Files
hakmem/core/hakmem_tiny_free.inc
Moe Charm (CI) 52386401b3 Debug Counters Implementation - Clean History
Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-05 12:31:14 +09:00

1631 lines
72 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <inttypes.h>
#include "tiny_remote.h"
#include "slab_handle.h"
#include "tiny_refill.h"
#include "tiny_tls_guard.h"
#include "mid_tcache.h"
extern __thread void* g_tls_sll_head[TINY_NUM_CLASSES];
extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES];
#if !HAKMEM_BUILD_RELEASE
#include "hakmem_tiny_magazine.h"
#endif
extern int g_tiny_force_remote;
// ENV: HAKMEM_TINY_DRAIN_TO_SLL (0=off) — adopt/bind境界でfreelist→TLS SLLへN個スプライス
static inline int tiny_drain_to_sll_budget(void) {
static int v = -1;
if (__builtin_expect(v == -1, 0)) {
const char* s = getenv("HAKMEM_TINY_DRAIN_TO_SLL");
int parsed = (s && *s) ? atoi(s) : 0;
if (parsed < 0) parsed = 0; if (parsed > 256) parsed = 256;
v = parsed;
}
return v;
}
static inline void tiny_drain_freelist_to_sll_once(SuperSlab* ss, int slab_idx, int class_idx) {
int budget = tiny_drain_to_sll_budget();
if (__builtin_expect(budget <= 0, 1)) return;
if (!(ss && ss->magic == SUPERSLAB_MAGIC)) return;
if (slab_idx < 0) return;
TinySlabMeta* m = &ss->slabs[slab_idx];
int moved = 0;
while (m->freelist && moved < budget) {
void* p = m->freelist;
m->freelist = *(void**)p;
*(void**)p = g_tls_sll_head[class_idx];
g_tls_sll_head[class_idx] = p;
g_tls_sll_count[class_idx]++;
moved++;
}
}
static inline int tiny_remote_queue_contains_guard(SuperSlab* ss, int slab_idx, void* target) {
if (!ss || slab_idx < 0) return 0;
uintptr_t cur = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire);
int limit = 8192;
while (cur && limit-- > 0) {
if ((void*)cur == target) {
return 1;
}
uintptr_t next;
if (__builtin_expect(g_remote_side_enable, 0)) {
next = tiny_remote_side_get(ss, slab_idx, (void*)cur);
} else {
next = atomic_load_explicit((_Atomic uintptr_t*)cur, memory_order_relaxed);
}
cur = next;
}
if (limit <= 0) {
return 1; // fail-safe: treat unbounded traversal as duplicate
}
return 0;
}
// Phase 6.12.1: Free with pre-calculated slab (Option C - avoids duplicate lookup)
void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
// Phase 7.6: slab == NULL means SuperSlab mode (Magazine integration)
if (!slab) {
// SuperSlab path: Get class_idx from SuperSlab
SuperSlab* ss = hak_super_lookup(ptr);
if (!ss || ss->magic != SUPERSLAB_MAGIC) return;
int class_idx = ss->size_class;
size_t ss_size = (size_t)1ULL << ss->lg_size;
uintptr_t ss_base = (uintptr_t)ss;
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
tiny_debug_ring_record(TINY_RING_EVENT_SUPERSLAB_ADOPT_FAIL, (uint16_t)0xFFu, ss, (uintptr_t)ss->size_class);
return;
}
// Optional: cross-lookup TinySlab owner and detect class mismatch early
if (__builtin_expect(g_tiny_safe_free, 0)) {
TinySlab* ts = hak_tiny_owner_slab(ptr);
if (ts) {
int ts_cls = ts->class_idx;
if (ts_cls >= 0 && ts_cls < TINY_NUM_CLASSES && ts_cls != class_idx) {
uint32_t code = 0xAA00u | ((uint32_t)ts_cls & 0xFFu);
uintptr_t aux = tiny_remote_pack_diag(code, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)class_idx, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
}
}
}
tiny_debug_ring_record(TINY_RING_EVENT_FREE_ENTER, (uint16_t)class_idx, ptr, 0);
// Detect cross-thread: cross-thread free MUST go via superslab path
int slab_idx = slab_index_for(ss, ptr);
int ss_cap = ss_slabs_capacity(ss);
if (__builtin_expect(slab_idx < 0 || slab_idx >= ss_cap, 0)) {
tiny_debug_ring_record(TINY_RING_EVENT_SUPERSLAB_ADOPT_FAIL, (uint16_t)0xFEu, ss, (uintptr_t)slab_idx);
return;
}
TinySlabMeta* meta = &ss->slabs[slab_idx];
if (__builtin_expect(g_tiny_safe_free, 0)) {
size_t blk = g_tiny_class_sizes[class_idx];
uint8_t* base = tiny_slab_base_for(ss, slab_idx);
uintptr_t delta = (uintptr_t)ptr - (uintptr_t)base;
int cap_ok = (meta->capacity > 0) ? 1 : 0;
int align_ok = (delta % blk) == 0;
int range_ok = cap_ok && (delta / blk) < meta->capacity;
if (!align_ok || !range_ok) {
uint32_t code = 0xA104u;
if (align_ok) code |= 0x2u;
if (range_ok) code |= 0x1u;
uintptr_t aux = tiny_remote_pack_diag(code, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)class_idx, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
}
uint32_t self_tid = tiny_self_u32();
if (__builtin_expect(meta->owner_tid != self_tid, 0)) {
// route directly to superslab (remote queue / freelist)
uintptr_t ptr_val = (uintptr_t)ptr;
uintptr_t ss_base = (uintptr_t)ss;
size_t ss_size = (size_t)1ULL << ss->lg_size;
if (__builtin_expect(ptr_val < ss_base || ptr_val >= ss_base + ss_size, 0)) {
tiny_debug_ring_record(TINY_RING_EVENT_SUPERSLAB_ADOPT_FAIL, (uint16_t)0xFDu, ss, ptr_val);
return;
}
tiny_debug_ring_record(TINY_RING_EVENT_FREE_REMOTE, (uint16_t)class_idx, ss, (uintptr_t)ptr);
hak_tiny_free_superslab(ptr, ss);
HAK_STAT_FREE(class_idx);
return;
}
if (__builtin_expect(g_debug_fast0, 0)) {
tiny_debug_ring_record(TINY_RING_EVENT_FRONT_BYPASS, (uint16_t)class_idx, ptr, (uintptr_t)slab_idx);
void* prev = meta->freelist;
*(void**)ptr = prev;
meta->freelist = ptr;
meta->used--;
ss_active_dec_one(ss);
if (prev == NULL) {
ss_partial_publish((int)ss->size_class, ss);
}
tiny_debug_ring_record(TINY_RING_EVENT_FREE_LOCAL, (uint16_t)class_idx, ptr, (uintptr_t)slab_idx);
HAK_STAT_FREE(class_idx);
return;
}
if (g_fast_enable && g_fast_cap[class_idx] != 0) {
if (tiny_fast_push(class_idx, ptr)) {
tiny_debug_ring_record(TINY_RING_EVENT_FREE_FAST, (uint16_t)class_idx, ptr, slab_idx);
HAK_STAT_FREE(class_idx);
return;
}
}
if (g_tls_list_enable) {
TinyTLSList* tls = &g_tls_lists[class_idx];
uint32_t seq = atomic_load_explicit(&g_tls_param_seq[class_idx], memory_order_relaxed);
if (__builtin_expect(seq != g_tls_param_seen[class_idx], 0)) {
tiny_tls_refresh_params(class_idx, tls);
}
// TinyHotMag front push8/16/32B, A/B
if (__builtin_expect(g_hotmag_enable && class_idx <= 2, 1)) {
if (hotmag_push(class_idx, ptr)) {
tiny_debug_ring_record(TINY_RING_EVENT_FREE_RETURN_MAG, (uint16_t)class_idx, ptr, 1);
HAK_STAT_FREE(class_idx);
return;
}
}
if (tls->count < tls->cap) {
tiny_tls_list_guard_push(class_idx, tls, ptr);
tls_list_push(tls, ptr);
tiny_debug_ring_record(TINY_RING_EVENT_FREE_LOCAL, (uint16_t)class_idx, ptr, 0);
HAK_STAT_FREE(class_idx);
return;
}
seq = atomic_load_explicit(&g_tls_param_seq[class_idx], memory_order_relaxed);
if (__builtin_expect(seq != g_tls_param_seen[class_idx], 0)) {
tiny_tls_refresh_params(class_idx, tls);
}
tiny_tls_list_guard_push(class_idx, tls, ptr);
tls_list_push(tls, ptr);
if (tls_list_should_spill(tls)) {
tls_list_spill_excess(class_idx, tls);
}
tiny_debug_ring_record(TINY_RING_EVENT_FREE_LOCAL, (uint16_t)class_idx, ptr, 2);
HAK_STAT_FREE(class_idx);
return;
}
#if !HAKMEM_BUILD_RELEASE
// SuperSlab uses Magazine for TLS caching (same as TinySlab)
tiny_small_mags_init_once();
if (class_idx > 3) tiny_mag_init_if_needed(class_idx);
TinyTLSMag* mag = &g_tls_mags[class_idx];
int cap = mag->cap;
// 32/64B: SLL優先mag優先は無効化
// Prefer TinyQuickSlot (compile-out if HAKMEM_TINY_NO_QUICK)
#if !defined(HAKMEM_TINY_NO_QUICK)
if (g_quick_enable && class_idx <= 4) {
TinyQuickSlot* qs = &g_tls_quick[class_idx];
if (__builtin_expect(qs->top < QUICK_CAP, 1)) {
qs->items[qs->top++] = ptr;
HAK_STAT_FREE(class_idx);
return;
}
}
#endif
// Fast path: TLS SLL push for hottest classes
if (!g_tls_list_enable && g_tls_sll_enable && g_tls_sll_count[class_idx] < sll_cap_for_class(class_idx, (uint32_t)cap)) {
*(void**)ptr = g_tls_sll_head[class_idx];
g_tls_sll_head[class_idx] = ptr;
g_tls_sll_count[class_idx]++;
// Active → Inactive: count down immediately (TLS保管中は"使用中"ではない)
ss_active_dec_one(ss);
HAK_TP1(sll_push, class_idx);
tiny_debug_ring_record(TINY_RING_EVENT_FREE_LOCAL, (uint16_t)class_idx, ptr, 3);
HAK_STAT_FREE(class_idx);
return;
}
// Next: Magazine push必要ならmag→SLLへバルク転送で空きを作る
// Hysteresis: allow slight overfill before deciding to spill under lock
if (mag->top >= cap && g_spill_hyst > 0) {
(void)bulk_mag_to_sll_if_room(class_idx, mag, cap / 2);
}
if (mag->top < cap + g_spill_hyst) {
mag->items[mag->top].ptr = ptr;
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = NULL; // SuperSlab owner not a TinySlab; leave NULL
#endif
mag->top++;
#if HAKMEM_DEBUG_COUNTERS
g_magazine_push_count++; // Phase 7.6: Track pushes
#endif
// Active → Inactive: decrement nowアプリ解放時に非アクティブ扱い
ss_active_dec_one(ss);
HAK_TP1(mag_push, class_idx);
tiny_debug_ring_record(TINY_RING_EVENT_FREE_RETURN_MAG, (uint16_t)class_idx, ptr, 2);
HAK_STAT_FREE(class_idx);
return;
}
// Background spill: queue to BG thread instead of locking (when enabled)
if (g_bg_spill_enable) {
uint32_t qlen = atomic_load_explicit(&g_bg_spill_len[class_idx], memory_order_relaxed);
if ((int)qlen < g_bg_spill_target) {
// Build a small chain: include current ptr and pop from mag up to limit
int limit = g_bg_spill_max_batch;
if (limit > cap/2) limit = cap/2;
if (limit > 32) limit = 32; // keep free-path bounded
void* head = ptr;
*(void**)head = NULL;
void* tail = head; // current tail
int taken = 1;
while (taken < limit && mag->top > 0) {
void* p2 = mag->items[--mag->top].ptr;
*(void**)p2 = head;
head = p2;
taken++;
}
// Push chain to spill queue (single CAS)
bg_spill_push_chain(class_idx, head, tail, taken);
tiny_debug_ring_record(TINY_RING_EVENT_FREE_RETURN_MAG, (uint16_t)class_idx, ptr, 3);
HAK_STAT_FREE(class_idx);
return;
}
}
// Spill half (SuperSlab version - simpler than TinySlab)
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
hkm_prof_begin(NULL);
pthread_mutex_lock(lock);
// Batch spill: reduce lock frequency and work per call
int spill = cap / 2;
int over = mag->top - (cap + g_spill_hyst);
if (over > 0 && over < spill) spill = over;
for (int i = 0; i < spill && mag->top > 0; i++) {
TinyMagItem it = mag->items[--mag->top];
// Phase 7.6: SuperSlab spill - return to freelist
SuperSlab* owner_ss = hak_super_lookup(it.ptr);
if (owner_ss && owner_ss->magic == SUPERSLAB_MAGIC) {
// Direct freelist push (same as old hak_tiny_free_superslab)
int slab_idx = slab_index_for(owner_ss, it.ptr);
TinySlabMeta* meta = &owner_ss->slabs[slab_idx];
*(void**)it.ptr = meta->freelist;
meta->freelist = it.ptr;
meta->used--;
// Decrement SuperSlab active counter (spill returns blocks to SS)
ss_active_dec_one(owner_ss);
// Phase 8.4: Empty SuperSlab detection (will use meta->used scan)
// TODO: Implement scan-based empty detection
// Empty SuperSlab detection/munmapは別途フラッシュAPIで実施ホットパスから除外
}
}
pthread_mutex_unlock(lock);
hkm_prof_end(ss_time, HKP_TINY_SPILL, &tss);
// Adaptive increase of cap after spill
int max_cap = tiny_cap_max_for_class(class_idx);
if (mag->cap < max_cap) {
int new_cap = mag->cap + (mag->cap / 2);
if (new_cap > max_cap) new_cap = max_cap;
if (new_cap > TINY_TLS_MAG_CAP) new_cap = TINY_TLS_MAG_CAP;
mag->cap = new_cap;
}
// Finally, try FastCache push first (≤128B) — compile-out if HAKMEM_TINY_NO_FRONT_CACHE
#if !defined(HAKMEM_TINY_NO_FRONT_CACHE)
if (g_fastcache_enable && class_idx <= 4) {
if (fastcache_push(class_idx, ptr)) {
HAK_TP1(front_push, class_idx);
HAK_STAT_FREE(class_idx);
return;
}
}
#endif
// Then TLS SLL if room, else magazine
if (g_tls_sll_enable && g_tls_sll_count[class_idx] < sll_cap_for_class(class_idx, (uint32_t)mag->cap)) {
*(void**)ptr = g_tls_sll_head[class_idx];
g_tls_sll_head[class_idx] = ptr;
g_tls_sll_count[class_idx]++;
} else {
mag->items[mag->top].ptr = ptr;
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
}
#if HAKMEM_DEBUG_COUNTERS
g_magazine_push_count++; // Phase 7.6: Track pushes
#endif
HAK_STAT_FREE(class_idx);
return;
#endif // HAKMEM_BUILD_RELEASE
}
// Phase 7.6: TinySlab path (original)
//g_tiny_free_with_slab_count++; // Phase 7.6: Track calls - DISABLED due to segfault
// Same-thread → TLS magazine; remote-thread → MPSC stack
if (pthread_equal(slab->owner_tid, tiny_self_pt())) {
int class_idx = slab->class_idx;
if (g_tls_list_enable) {
TinyTLSList* tls = &g_tls_lists[class_idx];
uint32_t seq = atomic_load_explicit(&g_tls_param_seq[class_idx], memory_order_relaxed);
if (__builtin_expect(seq != g_tls_param_seen[class_idx], 0)) {
tiny_tls_refresh_params(class_idx, tls);
}
// TinyHotMag front push8/16/32B, A/B
if (__builtin_expect(g_hotmag_enable && class_idx <= 2, 1)) {
if (hotmag_push(class_idx, ptr)) {
HAK_STAT_FREE(class_idx);
return;
}
}
if (tls->count < tls->cap) {
tiny_tls_list_guard_push(class_idx, tls, ptr);
tls_list_push(tls, ptr);
HAK_STAT_FREE(class_idx);
return;
}
seq = atomic_load_explicit(&g_tls_param_seq[class_idx], memory_order_relaxed);
if (__builtin_expect(seq != g_tls_param_seen[class_idx], 0)) {
tiny_tls_refresh_params(class_idx, tls);
}
tiny_tls_list_guard_push(class_idx, tls, ptr);
tls_list_push(tls, ptr);
if (tls_list_should_spill(tls)) {
tls_list_spill_excess(class_idx, tls);
}
HAK_STAT_FREE(class_idx);
return;
}
tiny_mag_init_if_needed(class_idx);
TinyTLSMag* mag = &g_tls_mags[class_idx];
int cap = mag->cap;
// 32/64B: SLL優先mag優先は無効化
// Fast path: FastCache push (preferred for ≤128B), then TLS SLL
if (g_fastcache_enable && class_idx <= 4) {
if (fastcache_push(class_idx, ptr)) {
HAK_STAT_FREE(class_idx);
return;
}
}
// Fast path: TLS SLL push (preferred)
if (!g_tls_list_enable && g_tls_sll_enable && class_idx <= 5) {
uint32_t sll_cap = sll_cap_for_class(class_idx, (uint32_t)cap);
if (g_tls_sll_count[class_idx] < sll_cap) {
*(void**)ptr = g_tls_sll_head[class_idx];
g_tls_sll_head[class_idx] = ptr;
g_tls_sll_count[class_idx]++;
HAK_STAT_FREE(class_idx);
return;
}
}
// Next: if magazine has room, push immediately and return満杯ならmag→SLLへバルク
if (mag->top >= cap) {
(void)bulk_mag_to_sll_if_room(class_idx, mag, cap / 2);
}
// Remote-drain can be handled opportunistically on future calls.
if (mag->top < cap) {
mag->items[mag->top].ptr = ptr;
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
#if HAKMEM_DEBUG_COUNTERS
g_magazine_push_count++; // Phase 7.6: Track pushes
#endif
// Note: SuperSlab uses separate path (slab == NULL branch above)
HAK_STAT_FREE(class_idx); // Phase 3
return;
}
// Magazine full: before spilling, opportunistically drain remotes once under lock.
if (atomic_load_explicit(&slab->remote_count, memory_order_relaxed) >= (unsigned)g_remote_drain_thresh_per_class[class_idx] || atomic_load_explicit(&slab->remote_head, memory_order_acquire)) {
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
pthread_mutex_lock(lock);
HAK_TP1(remote_drain, class_idx);
tiny_remote_drain_locked(slab);
pthread_mutex_unlock(lock);
}
// Spill half under class lock
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
pthread_mutex_lock(lock);
int spill = cap / 2;
// Phase 4.2: High-water threshold for gating Phase 4 logic
int high_water = (cap * 3) / 4; // 75% of capacity
for (int i = 0; i < spill && mag->top > 0; i++) {
TinyMagItem it = mag->items[--mag->top];
// Phase 7.6: Check for SuperSlab first (mixed Magazine support)
SuperSlab* ss_owner = hak_super_lookup(it.ptr);
if (ss_owner && ss_owner->magic == SUPERSLAB_MAGIC) {
// SuperSlab spill - return to freelist
int slab_idx = slab_index_for(ss_owner, it.ptr);
TinySlabMeta* meta = &ss_owner->slabs[slab_idx];
*(void**)it.ptr = meta->freelist;
meta->freelist = it.ptr;
meta->used--;
// 空SuperSlab処理はフラッシュ/バックグラウンドで対応(ホットパス除外)
HAK_STAT_FREE(class_idx);
continue; // Skip TinySlab processing
}
TinySlab* owner =
#if HAKMEM_TINY_MAG_OWNER
it.owner;
#else
NULL;
#endif
if (!owner) {
owner = tls_active_owner_for_ptr(class_idx, it.ptr);
}
if (!owner) {
owner = hak_tiny_owner_slab(it.ptr);
}
if (!owner) continue;
// Phase 4.2: Adaptive gating - skip Phase 4 when TLS Magazine is high-water
// Rationale: When mag->top >= 75%, next alloc will come from TLS anyway
// so pushing to mini-mag is wasted work
int is_high_water = (mag->top >= high_water);
if (!is_high_water) {
// Low-water: Phase 4.1 logic (try mini-magazine first)
uint8_t cidx = owner->class_idx; // Option A: 1回だけ読む
TinySlab* tls_a = g_tls_active_slab_a[cidx];
TinySlab* tls_b = g_tls_active_slab_b[cidx];
// Option B: Branch prediction hint (spill → TLS-active への戻りが likely)
if (__builtin_expect((owner == tls_a || owner == tls_b) &&
!mini_mag_is_full(&owner->mini_mag), 1)) {
// Fast path: mini-magazineに戻すbitmap触らない
mini_mag_push(&owner->mini_mag, it.ptr);
HAK_TP1(spill_tiny, cidx);
HAK_STAT_FREE(cidx);
continue; // bitmap操作スキップ
}
}
// High-water or Phase 4.1 mini-mag full: fall through to bitmap
// Slow path: bitmap直接書き込み既存ロジック
size_t bs = g_tiny_class_sizes[owner->class_idx];
int idx = ((uintptr_t)it.ptr - (uintptr_t)owner->base) / bs;
if (hak_tiny_is_used(owner, idx)) {
hak_tiny_set_free(owner, idx);
int was_full = (owner->free_count == 0);
owner->free_count++;
if (was_full) move_to_free_list(owner->class_idx, owner);
if (owner->free_count == owner->total_count) {
// If this slab is TLS-active for this thread, clear the pointer before releasing
if (g_tls_active_slab_a[owner->class_idx] == owner) g_tls_active_slab_a[owner->class_idx] = NULL;
if (g_tls_active_slab_b[owner->class_idx] == owner) g_tls_active_slab_b[owner->class_idx] = NULL;
TinySlab** headp = &g_tiny_pool.free_slabs[owner->class_idx];
TinySlab* prev = NULL;
for (TinySlab* s = *headp; s; prev = s, s = s->next) {
if (s == owner) { if (prev) prev->next = s->next; else *headp = s->next; break; }
}
release_slab(owner);
}
HAK_TP1(spill_tiny, owner->class_idx);
HAK_STAT_FREE(owner->class_idx);
}
}
pthread_mutex_unlock(lock);
hkm_prof_end(ss, HKP_TINY_SPILL, &tss);
// Adaptive increase of cap after spill
int max_cap = tiny_cap_max_for_class(class_idx);
if (mag->cap < max_cap) {
int new_cap = mag->cap + (mag->cap / 2);
if (new_cap > max_cap) new_cap = max_cap;
if (new_cap > TINY_TLS_MAG_CAP) new_cap = TINY_TLS_MAG_CAP;
mag->cap = new_cap;
}
// Finally: prefer TinyQuickSlot → SLL → UltraFront → HotMag → Magazine順序で局所性を確保
#if !HAKMEM_BUILD_RELEASE && !defined(HAKMEM_TINY_NO_QUICK)
if (g_quick_enable && class_idx <= 4) {
TinyQuickSlot* qs = &g_tls_quick[class_idx];
if (__builtin_expect(qs->top < QUICK_CAP, 1)) {
qs->items[qs->top++] = ptr;
} else if (g_tls_sll_enable) {
uint32_t sll_cap2 = sll_cap_for_class(class_idx, (uint32_t)mag->cap);
if (g_tls_sll_count[class_idx] < sll_cap2) {
*(void**)ptr = g_tls_sll_head[class_idx];
g_tls_sll_head[class_idx] = ptr;
g_tls_sll_count[class_idx]++;
} else if (!tiny_optional_push(class_idx, ptr)) {
mag->items[mag->top].ptr = ptr;
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
}
} else {
if (!tiny_optional_push(class_idx, ptr)) {
mag->items[mag->top].ptr = ptr;
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
}
}
} else
#endif
{
if (g_tls_sll_enable && class_idx <= 5) {
uint32_t sll_cap2 = sll_cap_for_class(class_idx, (uint32_t)mag->cap);
if (g_tls_sll_count[class_idx] < sll_cap2) {
*(void**)ptr = g_tls_sll_head[class_idx];
g_tls_sll_head[class_idx] = ptr;
g_tls_sll_count[class_idx]++;
} else if (!tiny_optional_push(class_idx, ptr)) {
mag->items[mag->top].ptr = ptr;
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
}
} else {
if (!tiny_optional_push(class_idx, ptr)) {
mag->items[mag->top].ptr = ptr;
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
}
}
}
#if HAKMEM_DEBUG_COUNTERS
g_magazine_push_count++; // Phase 7.6: Track pushes
#endif
// Note: SuperSlab uses separate path (slab == NULL branch above)
HAK_STAT_FREE(class_idx); // Phase 3
return;
} else {
tiny_remote_push(slab, ptr);
}
}
// ============================================================================
// Phase 6.23: SuperSlab Allocation Helpers
// ============================================================================
// Phase 6.24: Allocate from SuperSlab slab (lazy freelist + linear allocation)
static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx) {
TinySlabMeta* meta = &ss->slabs[slab_idx];
// Ensure remote queue is drained before handing blocks back to TLS
if (atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0) {
uint32_t self_tid = tiny_self_u32();
SlabHandle h = slab_try_acquire(ss, slab_idx, self_tid);
if (slab_is_valid(&h)) {
slab_drain_remote_full(&h);
int pending = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0;
if (__builtin_expect(pending, 0)) {
if (__builtin_expect(g_debug_remote_guard, 0)) {
uintptr_t head = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed);
tiny_remote_watch_note("alloc_pending_remote",
ss,
slab_idx,
(void*)head,
0xA243u,
self_tid,
0);
}
slab_release(&h);
return NULL;
}
slab_release(&h);
} else {
if (__builtin_expect(g_debug_remote_guard, 0)) {
tiny_remote_watch_note("alloc_acquire_fail",
ss,
slab_idx,
meta,
0xA244u,
self_tid,
0);
}
return NULL;
}
}
if (__builtin_expect(g_debug_remote_guard, 0)) {
uintptr_t head_pending = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire);
if (head_pending != 0) {
tiny_remote_watch_note("alloc_remote_pending",
ss,
slab_idx,
(void*)head_pending,
0xA247u,
tiny_self_u32(),
1);
return NULL;
}
}
// Phase 6.24: Linear allocation mode (freelist == NULL)
// This avoids the 4000-8000 cycle cost of building freelist on init
if (meta->freelist == NULL && meta->used < meta->capacity) {
// Linear allocation: sequential memory access (cache-friendly!)
size_t block_size = g_tiny_class_sizes[ss->size_class];
void* slab_start = slab_data_start(ss, slab_idx);
// First slab: skip SuperSlab header
if (slab_idx == 0) {
slab_start = (char*)slab_start + 1024;
}
void* block = (char*)slab_start + (meta->used * block_size);
meta->used++;
tiny_remote_track_on_alloc(ss, slab_idx, block, "linear_alloc", 0);
tiny_remote_assert_not_remote(ss, slab_idx, block, "linear_alloc_ret", 0);
return block; // Fast path: O(1) pointer arithmetic
}
// Freelist mode (after first free())
if (meta->freelist) {
void* block = meta->freelist;
meta->freelist = *(void**)block; // Pop from freelist
meta->used++;
tiny_remote_track_on_alloc(ss, slab_idx, block, "freelist_alloc", 0);
tiny_remote_assert_not_remote(ss, slab_idx, block, "freelist_alloc_ret", 0);
return block;
}
return NULL; // Slab is full
}
// Phase 6.24 & 7.6: Refill TLS SuperSlab (with unified TLS cache + deferred allocation)
static SuperSlab* superslab_refill(int class_idx) {
#if HAKMEM_DEBUG_COUNTERS
g_superslab_refill_calls_dbg[class_idx]++;
#endif
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
static int g_ss_adopt_en = -1; // env: HAKMEM_TINY_SS_ADOPT=1; default auto-on if remote seen
if (g_ss_adopt_en == -1) {
char* e = getenv("HAKMEM_TINY_SS_ADOPT");
if (e) {
g_ss_adopt_en = (*e != '0') ? 1 : 0;
} else {
extern _Atomic int g_ss_remote_seen;
g_ss_adopt_en = (atomic_load_explicit(&g_ss_remote_seen, memory_order_relaxed) != 0) ? 1 : 0;
}
}
extern int g_adopt_cool_period;
extern __thread int g_tls_adopt_cd[];
if (g_adopt_cool_period == -1) {
char* cd = getenv("HAKMEM_TINY_SS_ADOPT_COOLDOWN");
int v = (cd ? atoi(cd) : 0);
if (v < 0) v = 0; if (v > 1024) v = 1024;
g_adopt_cool_period = v;
}
static int g_superslab_refill_debug_once = 0;
SuperSlab* prev_ss = tls->ss;
TinySlabMeta* prev_meta = tls->meta;
uint8_t prev_slab_idx = tls->slab_idx;
uint8_t prev_active = prev_ss ? prev_ss->active_slabs : 0;
uint32_t prev_bitmap = prev_ss ? prev_ss->slab_bitmap : 0;
uint32_t prev_meta_used = prev_meta ? prev_meta->used : 0;
uint32_t prev_meta_cap = prev_meta ? prev_meta->capacity : 0;
int free_idx_attempted = -2; // -2 = not evaluated, -1 = none, >=0 = chosen
int reused_slabs = 0;
// Optional: Mid-size simple refill to avoid multi-layer scans (class>=4)
do {
static int g_mid_simple_warn = 0;
if (class_idx >= 4 && tiny_mid_refill_simple_enabled()) {
// If current TLS has a SuperSlab, prefer taking a virgin slab directly
if (tls->ss) {
int tls_cap = ss_slabs_capacity(tls->ss);
if (tls->ss->active_slabs < tls_cap) {
int free_idx = superslab_find_free_slab(tls->ss);
if (free_idx >= 0) {
uint32_t my_tid = tiny_self_u32();
superslab_init_slab(tls->ss, free_idx, g_tiny_class_sizes[class_idx], my_tid);
tiny_tls_bind_slab(tls, tls->ss, free_idx);
return tls->ss;
}
}
}
// Otherwise allocate a fresh SuperSlab and bind first slab
SuperSlab* ssn = superslab_allocate((uint8_t)class_idx);
if (!ssn) {
if (!g_superslab_refill_debug_once && g_mid_simple_warn < 2) {
g_mid_simple_warn++;
int err = errno;
fprintf(stderr, "[DEBUG] mid_simple_refill OOM class=%d errno=%d\n", class_idx, err);
}
return NULL;
}
uint32_t my_tid = tiny_self_u32();
superslab_init_slab(ssn, 0, g_tiny_class_sizes[class_idx], my_tid);
SuperSlab* old = tls->ss;
tiny_tls_bind_slab(tls, ssn, 0);
superslab_ref_inc(ssn);
if (old && old != ssn) { superslab_ref_dec(old); }
return ssn;
}
} while (0);
// First, try to adopt a published partial SuperSlab for this class
if (g_ss_adopt_en) {
if (g_adopt_cool_period > 0) {
if (g_tls_adopt_cd[class_idx] > 0) {
g_tls_adopt_cd[class_idx]--;
} else {
// eligible to adopt
}
}
if (g_adopt_cool_period == 0 || g_tls_adopt_cd[class_idx] == 0) {
SuperSlab* adopt = ss_partial_adopt(class_idx);
if (adopt && adopt->magic == SUPERSLAB_MAGIC) {
int best = -1;
uint32_t best_score = 0;
int adopt_cap = ss_slabs_capacity(adopt);
for (int s = 0; s < adopt_cap; s++) {
TinySlabMeta* m = &adopt->slabs[s];
uint32_t rc = atomic_load_explicit(&adopt->remote_counts[s], memory_order_relaxed);
int has_remote = (atomic_load_explicit(&adopt->remote_heads[s], memory_order_acquire) != 0);
uint32_t score = rc + (m->freelist ? (1u<<30) : 0u) + (has_remote ? 1u : 0u);
if (score > best_score) {
best_score = score;
best = s;
}
}
if (best >= 0) {
// Box: Try to acquire ownership atomically
uint32_t self = tiny_self_u32();
SlabHandle h = slab_try_acquire(adopt, best, self);
if (slab_is_valid(&h)) {
slab_drain_remote_full(&h);
if (slab_remote_pending(&h)) {
if (__builtin_expect(g_debug_remote_guard, 0)) {
uintptr_t head = atomic_load_explicit(&h.ss->remote_heads[h.slab_idx], memory_order_relaxed);
tiny_remote_watch_note("adopt_remote_pending",
h.ss,
h.slab_idx,
(void*)head,
0xA255u,
self,
0);
}
// Remote still pending; give up adopt path and fall through to normal refill.
slab_release(&h);
}
// Box 4 Boundary: bind は remote_head==0 を保証する必要がある
// slab_is_safe_to_bind() で TOCTOU-safe にチェック
if (slab_is_safe_to_bind(&h)) {
// Optional: move a few nodes to Front SLL to boost next hits
tiny_drain_freelist_to_sll_once(h.ss, h.slab_idx, class_idx);
// 安全に bind 可能freelist 存在 && remote_head==0 保証)
tiny_tls_bind_slab(tls, h.ss, h.slab_idx);
if (g_adopt_cool_period > 0) {
g_tls_adopt_cd[class_idx] = g_adopt_cool_period;
}
return h.ss;
}
// Safe to bind 失敗freelist なしor remote pending→ adopt 中止
slab_release(&h);
}
// Failed to acquire or no freelist - continue searching
}
// If no freelist found, ignore and continue (optional: republish)
}
}
}
// Phase 7.6 Step 4: Check existing SuperSlab with priority order
if (tls->ss) {
// Priority 1: Reuse slabs with freelist (already freed blocks)
int tls_cap = ss_slabs_capacity(tls->ss);
uint32_t nonempty_mask = 0;
do {
static int g_mask_en = -1;
if (__builtin_expect(g_mask_en == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_FREELIST_MASK");
g_mask_en = (e && *e && *e != '0') ? 1 : 0;
}
if (__builtin_expect(g_mask_en, 0)) {
nonempty_mask = atomic_load_explicit(&tls->ss->freelist_mask, memory_order_acquire);
break;
}
for (int i = 0; i < tls_cap; i++) {
if (tls->ss->slabs[i].freelist) nonempty_mask |= (1u << i);
}
} while (0);
// O(1) lookup: scan mask with ctz (1 instruction!)
while (__builtin_expect(nonempty_mask != 0, 1)) {
int i = __builtin_ctz(nonempty_mask); // Find first non-empty slab (O(1))
nonempty_mask &= ~(1u << i); // Clear bit for next iteration
// FIX #1 DELETED (Race condition fix):
// Previous drain without ownership caused concurrent freelist corruption.
// Ownership protocol: MUST bind+owner_cas BEFORE drain (see Fix #3 in tiny_refill.h).
// Remote frees will be drained when the slab is adopted (see tiny_refill.h paths).
uint32_t self_tid = tiny_self_u32();
SlabHandle h = slab_try_acquire(tls->ss, i, self_tid);
if (slab_is_valid(&h)) {
if (slab_remote_pending(&h)) {
slab_drain_remote_full(&h);
if (__builtin_expect(g_debug_remote_guard, 0)) {
uintptr_t head = atomic_load_explicit(&h.ss->remote_heads[h.slab_idx], memory_order_relaxed);
tiny_remote_watch_note("reuse_remote_pending",
h.ss,
h.slab_idx,
(void*)head,
0xA254u,
self_tid,
0);
}
slab_release(&h);
continue;
}
// Box 4 Boundary: bind は remote_head==0 を保証する必要がある
if (slab_is_safe_to_bind(&h)) {
// Optional: move a few nodes to Front SLL to boost next hits
tiny_drain_freelist_to_sll_once(h.ss, h.slab_idx, class_idx);
reused_slabs = 1;
tiny_tls_bind_slab(tls, h.ss, h.slab_idx);
return h.ss;
}
// Safe to bind 失敗 → 次の slab を試す
slab_release(&h);
}
}
// Priority 2: Use unused slabs (virgin slabs)
if (tls->ss->active_slabs < tls_cap) {
// Find next free slab
int free_idx = superslab_find_free_slab(tls->ss);
free_idx_attempted = free_idx;
if (free_idx >= 0) {
// Initialize this slab
uint32_t my_tid = tiny_self_u32();
superslab_init_slab(tls->ss, free_idx, g_tiny_class_sizes[class_idx], my_tid);
// Update TLS cache (unified update)
tiny_tls_bind_slab(tls, tls->ss, free_idx);
return tls->ss;
}
}
}
// Try to adopt a partial SuperSlab from registry (one-shot, cheap scan)
// This reduces pressure to allocate new SS when other threads freed blocks.
if (!tls->ss) {
// Best-effort: scan a small window of registry for our class
extern SuperRegEntry g_super_reg[];
int scanned = 0;
const int scan_max = tiny_reg_scan_max();
for (int i = 0; i < SUPER_REG_SIZE && scanned < scan_max; i++) {
SuperRegEntry* e = &g_super_reg[i];
uintptr_t base = atomic_load_explicit((_Atomic uintptr_t*)&e->base, memory_order_acquire);
if (base == 0) continue;
SuperSlab* ss = atomic_load_explicit(&e->ss, memory_order_acquire);
if (!ss || ss->magic != SUPERSLAB_MAGIC) continue;
if ((int)ss->size_class != class_idx) { scanned++; continue; }
// Pick first slab with freelist (Box 4: 所有権取得 + remote check)
int reg_cap = ss_slabs_capacity(ss);
uint32_t self_tid = tiny_self_u32();
for (int s = 0; s < reg_cap; s++) {
if (ss->slabs[s].freelist) {
SlabHandle h = slab_try_acquire(ss, s, self_tid);
if (slab_is_valid(&h)) {
slab_drain_remote_full(&h);
if (slab_is_safe_to_bind(&h)) {
tiny_drain_freelist_to_sll_once(h.ss, h.slab_idx, class_idx);
tiny_tls_bind_slab(tls, ss, s);
return ss;
}
slab_release(&h);
}
}
}
scanned++;
}
}
// Must-adopt-before-mmap gate: attempt sticky/hot/bench/mailbox/registry small-window
{
SuperSlab* gate_ss = tiny_must_adopt_gate(class_idx, tls);
if (gate_ss) return gate_ss;
}
// Allocate new SuperSlab
SuperSlab* ss = superslab_allocate((uint8_t)class_idx);
if (!ss) {
if (!g_superslab_refill_debug_once) {
g_superslab_refill_debug_once = 1;
int err = errno;
fprintf(stderr,
"[DEBUG] superslab_refill NULL detail: class=%d prev_ss=%p active=%u bitmap=0x%08x prev_meta=%p used=%u cap=%u slab_idx=%u reused_freelist=%d free_idx=%d errno=%d\n",
class_idx,
(void*)prev_ss,
(unsigned)prev_active,
prev_bitmap,
(void*)prev_meta,
(unsigned)prev_meta_used,
(unsigned)prev_meta_cap,
(unsigned)prev_slab_idx,
reused_slabs,
free_idx_attempted,
err);
}
return NULL; // OOM
}
// Initialize first slab
uint32_t my_tid = tiny_self_u32();
superslab_init_slab(ss, 0, g_tiny_class_sizes[class_idx], my_tid);
// Cache in unified TLS前のSS参照を解放
SuperSlab* old = tls->ss;
tiny_tls_bind_slab(tls, ss, 0);
// Maintain refcount将来の空回収に備え、TLS参照をカウント
superslab_ref_inc(ss);
if (old && old != ss) {
superslab_ref_dec(old);
}
return ss;
}
// Phase 6.24: SuperSlab-based allocation (TLS unified, Medium fix)
static inline void* hak_tiny_alloc_superslab(int class_idx) {
// DEBUG: Function entry trace
tiny_debug_ring_record(TINY_RING_EVENT_ALLOC_ENTER, 0x01, (void*)(uintptr_t)class_idx, 0);
// MidTC fast path: 128..1024Bclass>=4はTLS tcacheを最優先
do {
void* mp = midtc_pop(class_idx);
if (mp) {
HAK_RET_ALLOC(class_idx, mp);
}
} while (0);
// Phase 6.24: 1 TLS read (down from 3)
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
TinySlabMeta* meta = tls->meta;
int slab_idx = tls->slab_idx;
if (meta && slab_idx >= 0 && tls->ss) {
// A/B: Relaxed read for remote head presence check
static int g_alloc_remote_relax = -1; // env: HAKMEM_TINY_ALLOC_REMOTE_RELAX=1 → relaxed
if (__builtin_expect(g_alloc_remote_relax == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_ALLOC_REMOTE_RELAX");
g_alloc_remote_relax = (e && *e && *e != '0') ? 1 : 0;
}
uintptr_t pending = atomic_load_explicit(&tls->ss->remote_heads[slab_idx],
g_alloc_remote_relax ? memory_order_relaxed
: memory_order_acquire);
if (__builtin_expect(pending != 0, 0)) {
uint32_t self_tid = tiny_self_u32();
if (ss_owner_try_acquire(meta, self_tid)) {
_ss_remote_drain_to_freelist_unsafe(tls->ss, slab_idx, meta);
}
}
}
// FIX #2 DELETED (Race condition fix):
// Previous drain-all-slabs without ownership caused concurrent freelist corruption.
// Problem: Thread A owns slab 5, Thread B drains all slabs including 5 → both modify freelist → crash.
// Ownership protocol: MUST bind+owner_cas BEFORE drain (see Fix #3 in tiny_refill.h).
// Remote frees will be drained when the slab is adopted via refill paths.
// Fast path: Direct metadata access (no repeated TLS reads!)
if (meta && meta->freelist == NULL && meta->used < meta->capacity && tls->slab_base) {
// Linear allocation (lazy init)
size_t block_size = g_tiny_class_sizes[tls->ss->size_class];
void* block = (void*)(tls->slab_base + ((size_t)meta->used * block_size));
meta->used++;
// Track active blocks in SuperSlab for conservative reclamation
ss_active_inc(tls->ss);
HAK_RET_ALLOC(class_idx, block); // Phase 8.4: Zero hot-path overhead
}
if (meta && meta->freelist) {
// Freelist allocation
void* block = meta->freelist;
// Safety: bounds/alignment check (debug)
if (__builtin_expect(g_tiny_safe_free, 0)) {
size_t blk = g_tiny_class_sizes[tls->ss->size_class];
uint8_t* base = tiny_slab_base_for(tls->ss, tls->slab_idx);
uintptr_t delta = (uintptr_t)block - (uintptr_t)base;
int align_ok = ((delta % blk) == 0);
int range_ok = (delta / blk) < meta->capacity;
if (!align_ok || !range_ok) {
uintptr_t info = ((uintptr_t)(align_ok ? 1u : 0u) << 32) | (uint32_t)(range_ok ? 1u : 0u);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)tls->ss->size_class, block, info | 0xA100u);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return NULL; }
return NULL;
}
}
void* next = *(void**)block;
meta->freelist = next;
meta->used++;
// Optional: clear freelist bit when becomes empty
do {
static int g_mask_en = -1;
if (__builtin_expect(g_mask_en == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_FREELIST_MASK");
g_mask_en = (e && *e && *e != '0') ? 1 : 0;
}
if (__builtin_expect(g_mask_en, 0) && next == NULL) {
uint32_t bit = (1u << slab_idx);
atomic_fetch_and_explicit(&tls->ss->freelist_mask, ~bit, memory_order_release);
}
} while (0);
// Track active blocks in SuperSlab for conservative reclamation
ss_active_inc(tls->ss);
HAK_RET_ALLOC(class_idx, block); // Phase 8.4: Zero hot-path overhead
}
// Slow path: Refill TLS slab
SuperSlab* ss = superslab_refill(class_idx);
if (!ss) {
static int log_oom = 0;
if (log_oom < 2) { fprintf(stderr, "[DEBUG] superslab_refill returned NULL (OOM)\n"); log_oom++; }
return NULL; // OOM
}
// Retry allocation (metadata already cached in superslab_refill)
meta = tls->meta;
// DEBUG: Check each condition (disabled for benchmarks)
// static int log_retry = 0;
// if (log_retry < 2) {
// fprintf(stderr, "[DEBUG] Retry alloc: meta=%p, freelist=%p, used=%u, capacity=%u, slab_base=%p\n",
// (void*)meta, meta ? meta->freelist : NULL,
// meta ? meta->used : 0, meta ? meta->capacity : 0,
// (void*)tls->slab_base);
// log_retry++;
// }
if (meta && meta->freelist == NULL && meta->used < meta->capacity && tls->slab_base) {
size_t block_size = g_tiny_class_sizes[ss->size_class];
void* block = (void*)(tls->slab_base + ((size_t)meta->used * block_size));
// Disabled for benchmarks
// static int log_success = 0;
// if (log_success < 2) {
// fprintf(stderr, "[DEBUG] Superslab alloc SUCCESS: ptr=%p, class=%d, used=%u->%u\n",
// block, class_idx, meta->used, meta->used + 1);
// log_success++;
// }
meta->used++;
// Track active blocks in SuperSlab for conservative reclamation
ss_active_inc(ss);
HAK_RET_ALLOC(class_idx, block); // Phase 8.4: Zero hot-path overhead
}
// Disabled for benchmarks
// static int log_fail = 0;
// if (log_fail < 2) {
// fprintf(stderr, "[DEBUG] Retry alloc FAILED - returning NULL\n");
// log_fail++;
// }
return NULL;
}
// Phase 6.22-B: SuperSlab fast free path
static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
HAK_DBG_INC(g_superslab_free_count); // Phase 7.6: Track SuperSlab frees
// Get slab index (supports 1MB/2MB SuperSlabs)
int slab_idx = slab_index_for(ss, ptr);
size_t ss_size = (size_t)1ULL << ss->lg_size;
uintptr_t ss_base = (uintptr_t)ss;
if (__builtin_expect(slab_idx < 0, 0)) {
uintptr_t aux = tiny_remote_pack_diag(0xBAD1u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
TinySlabMeta* meta = &ss->slabs[slab_idx];
if (__builtin_expect(tiny_remote_watch_is(ptr), 0)) {
tiny_remote_watch_note("free_enter", ss, slab_idx, ptr, 0xA240u, tiny_self_u32(), 0);
extern __thread TinyTLSSlab g_tls_slabs[];
tiny_alloc_dump_tls_state(ss->size_class, "watch_free_enter", &g_tls_slabs[ss->size_class]);
#if !HAKMEM_BUILD_RELEASE
extern __thread TinyTLSMag g_tls_mags[];
TinyTLSMag* watch_mag = &g_tls_mags[ss->size_class];
fprintf(stderr,
"[REMOTE_WATCH_MAG] cls=%u mag_top=%d cap=%d\n",
ss->size_class,
watch_mag->top,
watch_mag->cap);
#endif
}
if (__builtin_expect(g_tiny_safe_free, 0)) {
size_t blk = g_tiny_class_sizes[ss->size_class];
uint8_t* base = tiny_slab_base_for(ss, slab_idx);
uintptr_t delta = (uintptr_t)ptr - (uintptr_t)base;
int cap_ok = (meta->capacity > 0) ? 1 : 0;
int align_ok = (delta % blk) == 0;
int range_ok = cap_ok && (delta / blk) < meta->capacity;
if (!align_ok || !range_ok) {
uint32_t code = 0xA100u;
if (align_ok) code |= 0x2u;
if (range_ok) code |= 0x1u;
uintptr_t aux = tiny_remote_pack_diag(code, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
// Duplicate in freelist (best-effort scan up to 64)
void* scan = meta->freelist; int scanned = 0; int dup = 0;
while (scan && scanned < 64) { if (scan == ptr) { dup = 1; break; } scan = *(void**)scan; scanned++; }
if (dup) {
uintptr_t aux = tiny_remote_pack_diag(0xDFu, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
}
// Phase 6.23: Same-thread check
uint32_t my_tid = tiny_self_u32();
const int debug_guard = g_debug_remote_guard;
static __thread int g_debug_free_count = 0;
if (!g_tiny_force_remote && meta->owner_tid != 0 && meta->owner_tid == my_tid) {
// Fast path: Direct freelist push (same-thread)
if (g_debug_free_count < 1) {
fprintf(stderr, "[FREE_SS] SAME-THREAD: owner=%u my=%u\n",
meta->owner_tid, my_tid);
g_debug_free_count++;
}
if (__builtin_expect(meta->used == 0, 0)) {
uintptr_t aux = tiny_remote_pack_diag(0x00u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
tiny_remote_track_expect_alloc(ss, slab_idx, ptr, "local_free_enter", my_tid);
if (!tiny_remote_guard_allow_local_push(ss, slab_idx, meta, ptr, "local_free", my_tid)) {
int transitioned = ss_remote_push(ss, slab_idx, ptr);
meta->used--;
ss_active_dec_one(ss);
if (transitioned) {
ss_partial_publish((int)ss->size_class, ss);
}
return;
}
// Optional: MidTC (TLS tcache for 128..1024B)
do {
int cls = (int)ss->size_class;
if (midtc_enabled() && cls >= 4) {
if (midtc_push(cls, ptr)) {
// Treat as returned to TLS cache (not SS freelist)
meta->used--;
ss_active_dec_one(ss);
return;
}
}
} while (0);
void* prev = meta->freelist;
*(void**)ptr = prev;
meta->freelist = ptr;
do {
static int g_mask_en = -1;
if (__builtin_expect(g_mask_en == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_FREELIST_MASK");
g_mask_en = (e && *e && *e != '0') ? 1 : 0;
}
if (__builtin_expect(g_mask_en, 0) && prev == NULL) {
uint32_t bit = (1u << slab_idx);
atomic_fetch_or_explicit(&ss->freelist_mask, bit, memory_order_release);
}
} while (0);
tiny_remote_track_on_local_free(ss, slab_idx, ptr, "local_free", my_tid);
meta->used--;
// Decrement SuperSlab active counter (actual return to SS)
ss_active_dec_one(ss);
if (prev == NULL) {
ss_partial_publish((int)ss->size_class, ss);
}
if (__builtin_expect(debug_guard, 0)) {
fprintf(stderr, "[REMOTE_LOCAL] cls=%u slab=%d owner=%u my=%u ptr=%p prev=%p used=%u\n",
ss->size_class, slab_idx, meta->owner_tid, my_tid, ptr, prev, meta->used);
}
// 空検出は別途(ホットパス除外)
} else {
if (__builtin_expect(meta->owner_tid == my_tid && meta->owner_tid == 0, 0)) {
uintptr_t aux = tiny_remote_pack_diag(0xA300u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
if (debug_guard) {
fprintf(stderr, "[REMOTE_OWNER_ZERO] cls=%u slab=%d ptr=%p my=%u used=%u\n",
ss->size_class, slab_idx, ptr, my_tid, (unsigned)meta->used);
}
}
tiny_remote_track_expect_alloc(ss, slab_idx, ptr, "remote_free_enter", my_tid);
// Slow path: Remote free (cross-thread)
if (g_debug_free_count < 5) {
fprintf(stderr, "[FREE_SS] CROSS-THREAD: owner=%u my=%u slab_idx=%d\n",
meta->owner_tid, my_tid, slab_idx);
g_debug_free_count++;
}
if (__builtin_expect(g_tiny_safe_free, 0)) {
// Best-effort duplicate scan in remote stack (up to 64 nodes)
uintptr_t head = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire);
uintptr_t base = ss_base;
int scanned = 0; int dup = 0;
uintptr_t cur = head;
while (cur && scanned < 64) {
if ((cur < base) || (cur >= base + ss_size)) {
uintptr_t aux = tiny_remote_pack_diag(0xA200u, base, ss_size, cur);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, (void*)cur, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
break;
}
if ((void*)cur == ptr) { dup = 1; break; }
if (__builtin_expect(g_remote_side_enable, 0)) {
if (!tiny_remote_sentinel_ok((void*)cur)) {
uintptr_t aux = tiny_remote_pack_diag(0xA202u, base, ss_size, cur);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, (void*)cur, aux);
uintptr_t observed = atomic_load_explicit((_Atomic uintptr_t*)(void*)cur, memory_order_relaxed);
tiny_remote_report_corruption("scan", (void*)cur, observed);
fprintf(stderr,
"[REMOTE_SENTINEL] cls=%u slab=%d cur=%p head=%p ptr=%p scanned=%d observed=0x%016" PRIxPTR " owner=%u used=%u freelist=%p remote_head=%p\n",
ss->size_class,
slab_idx,
(void*)cur,
(void*)head,
ptr,
scanned,
observed,
meta->owner_tid,
(unsigned)meta->used,
meta->freelist,
(void*)atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed));
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
break;
}
cur = tiny_remote_side_get(ss, slab_idx, (void*)cur);
} else {
if ((cur & (uintptr_t)(sizeof(void*) - 1)) != 0) {
uintptr_t aux = tiny_remote_pack_diag(0xA201u, base, ss_size, cur);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, (void*)cur, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
break;
}
cur = (uintptr_t)(*(void**)(void*)cur);
}
scanned++;
}
if (dup) {
uintptr_t aux = tiny_remote_pack_diag(0xD1u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
}
if (__builtin_expect(meta->used == 0, 0)) {
uintptr_t aux = tiny_remote_pack_diag(0x01u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
static int g_ss_adopt_en2 = -1; // env cached
if (g_ss_adopt_en2 == -1) {
char* e = getenv("HAKMEM_TINY_SS_ADOPT");
// 既定: Remote Queueを使う1。env指定時のみ上書き。
g_ss_adopt_en2 = (e == NULL) ? 1 : ((*e != '0') ? 1 : 0);
if (__builtin_expect(debug_guard, 0)) {
fprintf(stderr, "[FREE_SS] g_ss_adopt_en2=%d (env='%s')\n", g_ss_adopt_en2, e ? e : "(null)");
}
}
if (g_ss_adopt_en2) {
// Use remote queue
uintptr_t head_word = __atomic_load_n((uintptr_t*)ptr, __ATOMIC_RELAXED);
fprintf(stderr, "[REMOTE_PUSH_CALL] cls=%u slab=%d owner=%u my=%u ptr=%p used=%u remote_count=%u head=%p word=0x%016" PRIxPTR "\n",
ss->size_class,
slab_idx,
meta->owner_tid,
my_tid,
ptr,
(unsigned)meta->used,
atomic_load_explicit(&ss->remote_counts[slab_idx], memory_order_relaxed),
(void*)atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed),
head_word);
int dup_remote = tiny_remote_queue_contains_guard(ss, slab_idx, ptr);
if (!dup_remote && __builtin_expect(g_remote_side_enable, 0)) {
dup_remote = (head_word == TINY_REMOTE_SENTINEL) || tiny_remote_side_contains(ss, slab_idx, ptr);
}
if (__builtin_expect(head_word == TINY_REMOTE_SENTINEL && !dup_remote && g_debug_remote_guard, 0)) {
tiny_remote_watch_note("dup_scan_miss", ss, slab_idx, ptr, 0xA215u, my_tid, 0);
}
if (dup_remote) {
uintptr_t aux = tiny_remote_pack_diag(0xA214u, ss_base, ss_size, (uintptr_t)ptr);
tiny_remote_watch_mark(ptr, "dup_prevent", my_tid);
tiny_remote_watch_note("dup_prevent", ss, slab_idx, ptr, 0xA214u, my_tid, 0);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
if (__builtin_expect(g_remote_side_enable && (head_word & 0xFFFFu) == 0x6261u, 0)) {
// TLS guard scribble detected on the node's first word → same-pointer double free across routes
uintptr_t aux = tiny_remote_pack_diag(0xA213u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
tiny_remote_watch_mark(ptr, "pre_push", my_tid);
tiny_remote_watch_note("pre_push", ss, slab_idx, ptr, 0xA231u, my_tid, 0);
tiny_remote_report_corruption("pre_push", ptr, head_word);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
if (__builtin_expect(tiny_remote_watch_is(ptr), 0)) {
tiny_remote_watch_note("free_remote", ss, slab_idx, ptr, 0xA232u, my_tid, 0);
}
int was_empty = ss_remote_push(ss, slab_idx, ptr);
meta->used--;
ss_active_dec_one(ss);
if (was_empty) {
ss_partial_publish((int)ss->size_class, ss);
}
} else {
// Fallback: direct freelist push (legacy)
fprintf(stderr, "[FREE_SS] Using LEGACY freelist push (not remote queue)\n");
void* prev = meta->freelist;
*(void**)ptr = prev;
meta->freelist = ptr;
do {
static int g_mask_en = -1;
if (__builtin_expect(g_mask_en == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_FREELIST_MASK");
g_mask_en = (e && *e && *e != '0') ? 1 : 0;
}
if (__builtin_expect(g_mask_en, 0) && prev == NULL) {
uint32_t bit = (1u << slab_idx);
atomic_fetch_or_explicit(&ss->freelist_mask, bit, memory_order_release);
}
} while (0);
meta->used--;
ss_active_dec_one(ss);
if (prev == NULL) {
ss_partial_publish((int)ss->size_class, ss);
}
}
// 空検出は別途(ホットパス除外)
}
}
void hak_tiny_free(void* ptr) {
if (!ptr || !g_tiny_initialized) return;
hak_tiny_stats_poll();
tiny_debug_ring_record(TINY_RING_EVENT_FREE_ENTER, 0, ptr, 0);
#ifdef HAKMEM_TINY_BENCH_SLL_ONLY
// Bench-only SLL-only free: push to TLS SLL for ≤64B when possible
{
int class_idx = -1;
if (g_use_superslab) {
// FIXED: Use hak_super_lookup() instead of hak_super_lookup() to avoid false positives
SuperSlab* ss = hak_super_lookup(ptr);
if (ss && ss->magic == SUPERSLAB_MAGIC) class_idx = ss->size_class;
}
if (class_idx < 0) {
TinySlab* slab = hak_tiny_owner_slab(ptr);
if (slab) class_idx = slab->class_idx;
}
if (class_idx >= 0 && class_idx <= 3) {
uint32_t sll_cap = sll_cap_for_class(class_idx, (uint32_t)TINY_TLS_MAG_CAP);
if ((int)g_tls_sll_count[class_idx] < (int)sll_cap) {
*(void**)ptr = g_tls_sll_head[class_idx];
g_tls_sll_head[class_idx] = ptr;
g_tls_sll_count[class_idx]++;
return;
}
}
}
#endif
if (g_tiny_ultra) {
int class_idx = -1;
if (g_use_superslab) {
// FIXED: Use hak_super_lookup() instead of hak_super_lookup() to avoid false positives
SuperSlab* ss = hak_super_lookup(ptr);
if (ss && ss->magic == SUPERSLAB_MAGIC) class_idx = ss->size_class;
}
if (class_idx < 0) {
TinySlab* slab = hak_tiny_owner_slab(ptr);
if (slab) class_idx = slab->class_idx;
}
if (class_idx >= 0) {
// Ultra free: push directly to TLS SLL without magazine init
int sll_cap = ultra_sll_cap_for_class(class_idx);
if ((int)g_tls_sll_count[class_idx] < sll_cap) {
*(void**)ptr = g_tls_sll_head[class_idx];
g_tls_sll_head[class_idx] = ptr;
g_tls_sll_count[class_idx]++;
return;
}
}
// Fallback to existing path if class resolution fails
}
SuperSlab* fast_ss = NULL;
TinySlab* fast_slab = NULL;
int fast_class_idx = -1;
if (g_use_superslab) {
fast_ss = hak_super_lookup(ptr);
if (fast_ss && fast_ss->magic == SUPERSLAB_MAGIC) {
fast_class_idx = fast_ss->size_class;
} else {
fast_ss = NULL;
}
}
if (fast_class_idx < 0) {
fast_slab = hak_tiny_owner_slab(ptr);
if (fast_slab) fast_class_idx = fast_slab->class_idx;
}
// Safety: detect class mismatch (SS vs TinySlab) early
if (__builtin_expect(g_tiny_safe_free && fast_class_idx >= 0, 0)) {
int ss_cls = -1, ts_cls = -1;
SuperSlab* chk_ss = fast_ss ? fast_ss : (g_use_superslab ? hak_super_lookup(ptr) : NULL);
if (chk_ss && chk_ss->magic == SUPERSLAB_MAGIC) ss_cls = chk_ss->size_class;
TinySlab* chk_slab = fast_slab ? fast_slab : hak_tiny_owner_slab(ptr);
if (chk_slab) ts_cls = chk_slab->class_idx;
if (ss_cls >= 0 && ts_cls >= 0 && ss_cls != ts_cls) {
uintptr_t packed = ((uintptr_t)(uint16_t)ss_cls << 16) | (uint16_t)ts_cls;
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)fast_class_idx, ptr, packed);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
}
}
if (fast_class_idx >= 0) {
tiny_debug_ring_record(TINY_RING_EVENT_FREE_ENTER, (uint16_t)fast_class_idx, ptr, 1);
}
if (fast_class_idx >= 0 && g_fast_enable && g_fast_cap[fast_class_idx] != 0) {
if (tiny_fast_push(fast_class_idx, ptr)) {
tiny_debug_ring_record(TINY_RING_EVENT_FREE_FAST, (uint16_t)fast_class_idx, ptr, 0);
HAK_STAT_FREE(fast_class_idx);
return;
}
}
// SuperSlab detection: prefer fast mask-based check when available
SuperSlab* ss = fast_ss;
if (!ss && g_use_superslab) {
ss = hak_super_lookup(ptr);
if (!(ss && ss->magic == SUPERSLAB_MAGIC)) {
ss = NULL;
}
}
if (ss && ss->magic == SUPERSLAB_MAGIC) {
// Direct SuperSlab free (avoid second lookup TOCTOU)
hak_tiny_free_superslab(ptr, ss);
HAK_STAT_FREE(ss->size_class);
return;
}
// Fallback to TinySlab only when SuperSlab is not in use
TinySlab* slab = fast_slab;
if (!slab) slab = hak_tiny_owner_slab(ptr);
if (!slab) return; // Not managed by Tiny Pool
if (__builtin_expect(g_use_superslab, 0)) {
// In SS mode, a pointer that resolves only to TinySlab is suspicious → treat as invalid free
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, 0xEE, ptr, 0xF1u);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
hak_tiny_free_with_slab(ptr, slab);
}
// ============================================================================
// EXTRACTED TO hakmem_tiny_query.c (Phase 2B-1)
// ============================================================================
// EXTRACTED: int hak_tiny_is_managed(void* ptr) {
// EXTRACTED: if (!ptr || !g_tiny_initialized) return 0;
// EXTRACTED: // Phase 6.12.1: O(1) slab lookup via registry/list
// EXTRACTED: return hak_tiny_owner_slab(ptr) != NULL || hak_super_lookup(ptr) != NULL;
// EXTRACTED: }
// Phase 7.6: Check if pointer is managed by Tiny Pool (TinySlab OR SuperSlab)
// EXTRACTED: int hak_tiny_is_managed_superslab(void* ptr) {
// EXTRACTED: if (!ptr || !g_tiny_initialized) return 0;
// EXTRACTED:
// EXTRACTED: // Safety: Only check if g_use_superslab is enabled
// EXTRACTED: if (g_use_superslab) {
// EXTRACTED: SuperSlab* ss = hak_super_lookup(ptr);
// EXTRACTED: // Phase 8.2 optimization: Use alignment check instead of mincore()
// EXTRACTED: // SuperSlabs are always SUPERSLAB_SIZE-aligned (2MB)
// EXTRACTED: if (ss && ((uintptr_t)ss & (SUPERSLAB_SIZE - 1)) == 0) {
// EXTRACTED: if (ss->magic == SUPERSLAB_MAGIC) {
// EXTRACTED: return 1; // Valid SuperSlab pointer
// EXTRACTED: }
// EXTRACTED: }
// EXTRACTED: }
// EXTRACTED:
// EXTRACTED: // Fallback to TinySlab check
// EXTRACTED: return hak_tiny_owner_slab(ptr) != NULL;
// EXTRACTED: }
// Return the usable size for a Tiny-managed pointer (0 if unknown/not tiny).
// Prefer SuperSlab metadata when available; otherwise use TinySlab owner class.
// EXTRACTED: size_t hak_tiny_usable_size(void* ptr) {
// EXTRACTED: if (!ptr || !g_tiny_initialized) return 0;
// EXTRACTED:
// EXTRACTED: // Check SuperSlab first via registry (safe under direct link and LD)
// EXTRACTED: if (g_use_superslab) {
// EXTRACTED: SuperSlab* ss = hak_super_lookup(ptr);
// EXTRACTED: if (ss && ss->magic == SUPERSLAB_MAGIC) {
// EXTRACTED: int k = (int)ss->size_class;
// EXTRACTED: if (k >= 0 && k < TINY_NUM_CLASSES) {
// EXTRACTED: return g_tiny_class_sizes[k];
// EXTRACTED: }
// EXTRACTED: }
// EXTRACTED: }
// EXTRACTED:
// EXTRACTED: // Fallback: TinySlab owner lookup
// EXTRACTED: TinySlab* slab = hak_tiny_owner_slab(ptr);
// EXTRACTED: if (slab) {
// EXTRACTED: int k = slab->class_idx;
// EXTRACTED: if (k >= 0 && k < TINY_NUM_CLASSES) {
// EXTRACTED: return g_tiny_class_sizes[k];
// EXTRACTED: }
// EXTRACTED: }
// EXTRACTED: return 0;
// EXTRACTED: }
// ============================================================================
// Statistics and Debug Functions - Extracted to hakmem_tiny_stats.c
// ============================================================================
// (Phase 2B API headers moved to top of file)
// Optional shutdown hook to stop background components (e.g., Intelligence Engine)
void hak_tiny_shutdown(void) {
// Release TLS SuperSlab references (dec refcount) before stopping BG/INT
for (int k = 0; k < TINY_NUM_CLASSES; k++) {
TinyTLSSlab* tls = &g_tls_slabs[k];
if (tls->ss) {
superslab_ref_dec(tls->ss);
tls->ss = NULL;
tls->meta = NULL;
tls->slab_base = NULL;
}
}
if (g_bg_bin_started) {
g_bg_bin_stop = 1;
if (!pthread_equal(tiny_self_pt(), g_bg_bin_thread)) {
pthread_join(g_bg_bin_thread, NULL);
}
g_bg_bin_started = 0;
g_bg_bin_enable = 0;
}
tiny_obs_shutdown();
if (g_int_engine && g_int_started) {
g_int_stop = 1;
// Best-effort join; avoid deadlock if called from within the thread
if (!pthread_equal(tiny_self_pt(), g_int_thread)) {
pthread_join(g_int_thread, NULL);
}
g_int_started = 0;
g_int_engine = 0;
}
}
// Always-available: Trim empty slabs (release fully-free slabs)