Files
hakmem/core/tiny_free_magazine.inc.h

505 lines
23 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// tiny_free_magazine.inc.h - Magazine Layer for hak_tiny_free_with_slab()
// Purpose: TLS caching (TinyQuickSlot, TLS SLL, Magazine) and spill logic
// Extracted from: hakmem_tiny_free.inc lines 208-620
// Box Theory: Box 5 (TLS Cache) integration
//
// Context: This file is #included within hak_tiny_free_with_slab() function body
// Prerequisites: ss, meta, class_idx, ptr variables must be defined in calling scope
#if !HAKMEM_BUILD_RELEASE
// SuperSlab uses Magazine for TLS caching (same as TinySlab)
tiny_small_mags_init_once();
if (class_idx > 3) tiny_mag_init_if_needed(class_idx);
TinyTLSMag* mag = &g_tls_mags[class_idx];
int cap = mag->cap;
// 32/64B: SLL優先mag優先は無効化
// Prefer TinyQuickSlot (compile-out if HAKMEM_TINY_NO_QUICK)
#if !defined(HAKMEM_TINY_NO_QUICK)
if (g_quick_enable && class_idx <= 4) {
TinyQuickSlot* qs = &g_tls_quick[class_idx];
if (__builtin_expect(qs->top < QUICK_CAP, 1)) {
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
qs->items[qs->top++] = HAK_BASE_TO_RAW(base_ptr);
HAK_STAT_FREE(class_idx);
return;
}
}
#endif
// Fast path: TLS SLL push for hottest classes
if (!g_tls_list_enable && g_tls_sll_enable && g_tls_sll[class_idx].count < sll_cap_for_class(class_idx, (uint32_t)cap)) {
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
uint32_t sll_cap = sll_cap_for_class(class_idx, (uint32_t)cap);
if (tls_sll_push(class_idx, base_ptr, sll_cap)) {
// BUGFIX: Decrement used counter (was missing, causing Fail-Fast on next free)
meta->used--;
// Active → Inactive: count down immediately (TLS保管中は"使用中"ではない)
ss_active_dec_one(ss);
HAK_TP1(sll_push, class_idx);
tiny_debug_ring_record(TINY_RING_EVENT_FREE_LOCAL, (uint16_t)class_idx, ptr, 3);
HAK_STAT_FREE(class_idx);
return;
}
}
// Next: Magazine push必要ならmag→SLLへバルク転送で空きを作る
// Hysteresis: allow slight overfill before deciding to spill under lock
if (mag->top >= cap && g_spill_hyst > 0) {
(void)bulk_mag_to_sll_if_room(class_idx, mag, cap / 2);
}
if (mag->top < cap + g_spill_hyst) {
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = NULL; // SuperSlab owner not a TinySlab; leave NULL
#endif
mag->top++;
#if HAKMEM_DEBUG_COUNTERS
g_magazine_push_count++; // Phase 7.6: Track pushes
#endif
// Active → Inactive: decrement nowアプリ解放時に非アクティブ扱い
ss_active_dec_one(ss);
HAK_TP1(mag_push, class_idx);
tiny_debug_ring_record(TINY_RING_EVENT_FREE_RETURN_MAG, (uint16_t)class_idx, ptr, 2);
HAK_STAT_FREE(class_idx);
return;
}
// Background spill: queue to BG thread instead of locking (when enabled)
if (g_bg_spill_enable) {
uint32_t qlen = atomic_load_explicit(&g_bg_spill_len[class_idx], memory_order_relaxed);
if ((int)qlen < g_bg_spill_target) {
// Build a small chain: include current ptr and pop from mag up to limit
int limit = g_bg_spill_max_batch;
if (limit > cap/2) limit = cap/2;
if (limit > 32) limit = 32; // keep free-path bounded
// Phase 10: Use hak_base_ptr_t
void* head = HAK_BASE_TO_RAW(hak_user_to_base(HAK_USER_FROM_RAW(ptr)));
#if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_off = 1; // Phase E1-CORRECT: Always 1
#else
const size_t next_off = 0;
#endif
// Build single-linked list via Box next-ptr API (per-class)
tiny_next_write(class_idx, head, NULL);
void* tail = head; // current tail
int taken = 1;
while (taken < limit && mag->top > 0) {
void* p2 = mag->items[--mag->top].ptr;
#if HAKMEM_TINY_HEADER_CLASSIDX
const size_t next_off2 = 1; // Phase E1-CORRECT: Always 1
#else
const size_t next_off2 = 0;
#endif
tiny_next_write(class_idx, p2, head);
head = p2;
taken++;
}
// Push chain to spill queue (single CAS)
bg_spill_push_chain(class_idx, head, tail, taken);
tiny_debug_ring_record(TINY_RING_EVENT_FREE_RETURN_MAG, (uint16_t)class_idx, ptr, 3);
HAK_STAT_FREE(class_idx);
return;
}
}
// Spill half (SuperSlab version - simpler than TinySlab)
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
// Profiling fix for debug build
struct timespec tss;
int ss_time = hkm_prof_begin(&tss);
pthread_mutex_lock(lock);
// Batch spill: reduce lock frequency and work per call
int spill = cap / 2;
int over = mag->top - (cap + g_spill_hyst);
if (over > 0 && over < spill) spill = over;
for (int i = 0; i < spill && mag->top > 0; i++) {
TinyMagItem it = mag->items[--mag->top];
// Phase 7.6: SuperSlab spill - return to freelist
SuperSlab* owner_ss = hak_super_lookup(it.ptr);
if (owner_ss && owner_ss->magic == SUPERSLAB_MAGIC) {
// Direct freelist push (same as old hak_tiny_free_superslab)
// Phase 10: it.ptr is BASE.
// FIX: it.ptr is BASE, use it directly (do not subtract 1)
void* base = it.ptr;
int slab_idx = slab_index_for(owner_ss, base);
// BUGFIX: Validate slab_idx before array access (prevents OOB)
if (slab_idx < 0 || slab_idx >= ss_slabs_capacity(owner_ss)) {
continue; // Skip invalid index
}
TinySlabMeta* meta = &owner_ss->slabs[slab_idx];
// Use per-slab class for freelist linkage (Phase 12)
tiny_next_write(meta->class_idx, it.ptr, meta->freelist);
meta->freelist = it.ptr;
meta->used--;
// Decrement SuperSlab active counter (spill returns blocks to SS)
ss_active_dec_one(owner_ss);
// Phase 8.4: Empty SuperSlab detection (will use meta->used scan)
// TODO: Implement scan-based empty detection
// Empty SuperSlab detection/munmapは別途フラッシュAPIで実施ホットパスから除外
}
}
pthread_mutex_unlock(lock);
hkm_prof_end(ss_time, HKP_TINY_SPILL, &tss);
// Adaptive increase of cap after spill
int max_cap = tiny_cap_max_for_class(class_idx);
if (mag->cap < max_cap) {
int new_cap = mag->cap + (mag->cap / 2);
if (new_cap > max_cap) new_cap = max_cap;
if (new_cap > TINY_TLS_MAG_CAP) new_cap = TINY_TLS_MAG_CAP;
mag->cap = new_cap;
}
// Finally, try FastCache push first (≤128B) — compile-out if HAKMEM_TINY_NO_FRONT_CACHE
#if !defined(HAKMEM_TINY_NO_FRONT_CACHE)
if (g_fastcache_enable && class_idx <= 4) {
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
if (fastcache_push(class_idx, HAK_BASE_TO_RAW(base_ptr))) {
HAK_TP1(front_push, class_idx);
HAK_STAT_FREE(class_idx);
return;
}
}
#endif
// Then TLS SLL if room, else magazine
if (g_tls_sll_enable && g_tls_sll[class_idx].count < sll_cap_for_class(class_idx, (uint32_t)mag->cap)) {
uint32_t sll_cap2 = sll_cap_for_class(class_idx, (uint32_t)mag->cap);
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
if (!tls_sll_push(class_idx, base_ptr, sll_cap2)) {
// fallback to magazine
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
}
} else {
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
}
#if HAKMEM_DEBUG_COUNTERS
g_magazine_push_count++; // Phase 7.6: Track pushes
#endif
HAK_STAT_FREE(class_idx);
return;
#endif // HAKMEM_BUILD_RELEASE
// Phase 7.6: TinySlab path (original)
//g_tiny_free_with_slab_count++; // Phase 7.6: Track calls - DISABLED due to segfault
// Same-thread → TLS magazine; remote-thread → MPSC stack
if (slab && pthread_equal(slab->owner_tid, tiny_self_pt())) {
int class_idx = slab->class_idx;
// Phase E1-CORRECT: C7 now has headers, can use TLS list like other classes
if (g_tls_list_enable) {
TinyTLSList* tls = &g_tls_lists[class_idx];
uint32_t seq = atomic_load_explicit(&g_tls_param_seq[class_idx], memory_order_relaxed);
if (__builtin_expect(seq != g_tls_param_seen[class_idx], 0)) {
tiny_tls_refresh_params(class_idx, tls);
}
// TinyHotMag front push8/16/32B, A/B
if (__builtin_expect(g_hotmag_enable && class_idx <= 2, 1)) {
// Phase 10: Use hak_base_ptr_t
void* base = HAK_BASE_TO_RAW(hak_user_to_base(HAK_USER_FROM_RAW(ptr)));
if (hotmag_push(class_idx, base)) {
HAK_STAT_FREE(class_idx);
return;
}
}
if (tls->count < tls->cap) {
// Phase 10: Use hak_base_ptr_t
void* base = HAK_BASE_TO_RAW(hak_user_to_base(HAK_USER_FROM_RAW(ptr)));
tiny_tls_list_guard_push(class_idx, tls, base);
tls_list_push_fast(tls, base, class_idx);
HAK_STAT_FREE(class_idx);
return;
}
seq = atomic_load_explicit(&g_tls_param_seq[class_idx], memory_order_relaxed);
if (__builtin_expect(seq != g_tls_param_seen[class_idx], 0)) {
tiny_tls_refresh_params(class_idx, tls);
}
{
// Phase 10: Use hak_base_ptr_t
void* base = HAK_BASE_TO_RAW(hak_user_to_base(HAK_USER_FROM_RAW(ptr)));
tiny_tls_list_guard_push(class_idx, tls, base);
tls_list_push_fast(tls, base, class_idx);
}
if (tls_list_should_spill(tls)) {
tls_list_spill_excess(class_idx, tls);
}
HAK_STAT_FREE(class_idx);
return;
}
tiny_mag_init_if_needed(class_idx);
TinyTLSMag* mag = &g_tls_mags[class_idx];
int cap = mag->cap;
// 32/64B: SLL優先mag優先は無効化
// Fast path: FastCache push (preferred for ≤128B), then TLS SLL
if (g_fastcache_enable && class_idx <= 4) {
if (fastcache_push(class_idx, ptr)) {
HAK_STAT_FREE(class_idx);
return;
}
}
// Fast path: TLS SLL push (preferred)
if (!g_tls_list_enable && g_tls_sll_enable && class_idx <= 5) {
uint32_t sll_cap = sll_cap_for_class(class_idx, (uint32_t)cap);
if (g_tls_sll[class_idx].count < sll_cap) {
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
if (tls_sll_push(class_idx, base_ptr, sll_cap)) {
HAK_STAT_FREE(class_idx);
return;
}
}
}
// Next: if magazine has room, push immediately and return満杯ならmag→SLLへバルク
if (mag->top >= cap) {
(void)bulk_mag_to_sll_if_room(class_idx, mag, cap / 2);
}
// Remote-drain can be handled opportunistically on future calls.
if (mag->top < cap) {
{
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
}
#if HAKMEM_DEBUG_COUNTERS
g_magazine_push_count++; // Phase 7.6: Track pushes
#endif
// Note: SuperSlab uses separate path (slab == NULL branch above)
HAK_STAT_FREE(class_idx); // Phase 3
return;
}
// Magazine full: before spilling, opportunistically drain remotes once under lock.
if (atomic_load_explicit(&slab->remote_count, memory_order_relaxed) >= (unsigned)g_remote_drain_thresh_per_class[class_idx] || atomic_load_explicit(&slab->remote_head, memory_order_acquire)) {
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
pthread_mutex_lock(lock);
HAK_TP1(remote_drain, class_idx);
tiny_remote_drain_locked(slab);
pthread_mutex_unlock(lock);
}
// Spill half under class lock
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
// Profiling fix
struct timespec tss;
int ss_time = hkm_prof_begin(&tss);
pthread_mutex_lock(lock);
int spill = cap / 2;
// Phase 4.2: High-water threshold for gating Phase 4 logic
int high_water = (cap * 3) / 4; // 75% of capacity
for (int i = 0; i < spill && mag->top > 0; i++) {
TinyMagItem it = mag->items[--mag->top];
// Phase 7.6: Check for SuperSlab first (mixed Magazine support)
SuperSlab* ss_owner = hak_super_lookup(it.ptr);
if (ss_owner && ss_owner->magic == SUPERSLAB_MAGIC) {
// SuperSlab spill - return to freelist
// FIX: it.ptr is BASE, use directly
void* base = it.ptr;
int slab_idx = slab_index_for(ss_owner, base);
// BUGFIX: Validate slab_idx before array access (prevents OOB)
if (slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss_owner)) {
HAK_STAT_FREE(class_idx);
continue; // Skip invalid index
}
TinySlabMeta* meta = &ss_owner->slabs[slab_idx];
// Use per-slab class for freelist linkage (Phase 12)
tiny_next_write(meta->class_idx, it.ptr, meta->freelist);
meta->freelist = it.ptr;
meta->used--;
// 空SuperSlab処理はフラッシュ/バックグラウンドで対応(ホットパス除外)
HAK_STAT_FREE(class_idx);
continue; // Skip TinySlab processing
}
TinySlab* owner =
#if HAKMEM_TINY_MAG_OWNER
it.owner;
#else
NULL;
#endif
if (!owner) {
owner = tls_active_owner_for_ptr(class_idx, it.ptr);
}
if (!owner) {
owner = hak_tiny_owner_slab(it.ptr);
}
if (!owner) continue;
// Phase 4.2: Adaptive gating - skip Phase 4 when TLS Magazine is high-water
// Rationale: When mag->top >= 75%, next alloc will come from TLS anyway
// so pushing to mini-mag is wasted work
int is_high_water = (mag->top >= high_water);
if (!is_high_water) {
// Low-water: Phase 4.1 logic (try mini-magazine first)
uint8_t cidx = owner->class_idx; // Option A: 1回だけ読む
TinySlab* tls_a = g_tls_active_slab_a[cidx];
TinySlab* tls_b = g_tls_active_slab_b[cidx];
// Option B: Branch prediction hint (spill → TLS-active への戻りが likely)
if (__builtin_expect((owner == tls_a || owner == tls_b) &&
!mini_mag_is_full(&owner->mini_mag), 1)) {
// Fast path: mini-magazineに戻すbitmap触らない
mini_mag_push(&owner->mini_mag, it.ptr);
HAK_TP1(spill_tiny, cidx);
HAK_STAT_FREE(cidx);
continue; // bitmap操作スキップ
}
}
// High-water or Phase 4.1 mini-mag full: fall through to bitmap
// Slow path: bitmap直接書き込み既存ロジック
size_t bs = g_tiny_class_sizes[owner->class_idx];
int idx = ((uintptr_t)it.ptr - (uintptr_t)owner->base) / bs;
if (hak_tiny_is_used(owner, idx)) {
hak_tiny_set_free(owner, idx);
int was_full = (owner->free_count == 0);
owner->free_count++;
if (was_full) move_to_free_list(owner->class_idx, owner);
if (owner->free_count == owner->total_count) {
// If this slab is TLS-active for this thread, clear the pointer before releasing
if (g_tls_active_slab_a[owner->class_idx] == owner) g_tls_active_slab_a[owner->class_idx] = NULL;
if (g_tls_active_slab_b[owner->class_idx] == owner) g_tls_active_slab_b[owner->class_idx] = NULL;
TinySlab** headp = &g_tiny_pool.free_slabs[owner->class_idx];
TinySlab* prev = NULL;
for (TinySlab* s = *headp; s; prev = s, s = s->next) {
if (s == owner) { if (prev) prev->next = s->next; else *headp = s->next; break; }
}
release_slab(owner);
}
HAK_TP1(spill_tiny, owner->class_idx);
HAK_STAT_FREE(owner->class_idx);
}
}
pthread_mutex_unlock(lock);
hkm_prof_end(ss_time, HKP_TINY_SPILL, &tss);
// Adaptive increase of cap after spill
int max_cap = tiny_cap_max_for_class(class_idx);
if (mag->cap < max_cap) {
int new_cap = mag->cap + (mag->cap / 2);
if (new_cap > max_cap) new_cap = max_cap;
if (new_cap > TINY_TLS_MAG_CAP) new_cap = TINY_TLS_MAG_CAP;
mag->cap = new_cap;
}
// Finally: prefer TinyQuickSlot → SLL → UltraFront → HotMag → Magazine順序で局所性を確保
#if !HAKMEM_BUILD_RELEASE && !defined(HAKMEM_TINY_NO_QUICK)
if (g_quick_enable && class_idx <= 4) {
TinyQuickSlot* qs = &g_tls_quick[class_idx];
if (__builtin_expect(qs->top < QUICK_CAP, 1)) {
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
qs->items[qs->top++] = HAK_BASE_TO_RAW(base_ptr);
} else if (g_tls_sll_enable) {
uint32_t sll_cap2 = sll_cap_for_class(class_idx, (uint32_t)mag->cap);
if (g_tls_sll[class_idx].count < sll_cap2) {
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
if (!tls_sll_push(class_idx, base_ptr, sll_cap2)) {
if (!tiny_optional_push(class_idx, HAK_BASE_TO_RAW(base_ptr))) {
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
}
}
} else if (!tiny_optional_push(class_idx, HAK_BASE_TO_RAW(hak_user_to_base(HAK_USER_FROM_RAW(ptr))))) { // FIX: use ptr_user_to_base
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
}
} else {
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
if (!tiny_optional_push(class_idx, HAK_BASE_TO_RAW(base_ptr))) {
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
}
}
} else
#endif
{
if (g_tls_sll_enable && class_idx <= 5) {
uint32_t sll_cap2 = sll_cap_for_class(class_idx, (uint32_t)mag->cap);
if (g_tls_sll[class_idx].count < sll_cap2) {
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
if (!tls_sll_push(class_idx, base_ptr, sll_cap2)) {
if (!tiny_optional_push(class_idx, HAK_BASE_TO_RAW(base_ptr))) {
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
}
}
} else if (!tiny_optional_push(class_idx, HAK_BASE_TO_RAW(hak_user_to_base(HAK_USER_FROM_RAW(ptr))))) { // FIX: use ptr_user_to_base
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
}
} else {
// Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
if (!tiny_optional_push(class_idx, HAK_BASE_TO_RAW(base_ptr))) {
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
#if HAKMEM_TINY_MAG_OWNER
mag->items[mag->top].owner = slab;
#endif
mag->top++;
}
}
}
#if HAKMEM_DEBUG_COUNTERS
g_magazine_push_count++; // Phase 7.6: Track pushes
#endif
// Note: SuperSlab uses separate path (slab == NULL branch above)
HAK_STAT_FREE(class_idx); // Phase 3
return;
} else if (slab) {
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
// FIX: Use ptr_user_to_base to get correct base
void* base = HAK_BASE_TO_RAW(hak_user_to_base(HAK_USER_FROM_RAW(ptr)));
tiny_remote_push(slab, base);
}
}