Files
hakmem/core/tiny_superslab_free.inc.h
Moe Charm (CI) acc64f2438 Phase ML1: Pool v1 memset 89.73% overhead 軽量化 (+15.34% improvement)
## Summary
- ChatGPT により bench_profile.h の setenv segfault を修正(RTLD_NEXT 経由に切り替え)
- core/box/pool_zero_mode_box.h 新設:ENV キャッシュ経由で ZERO_MODE を統一管理
- core/hakmem_pool.c で zero mode に応じた memset 制御(FULL/header/off)
- A/B テスト結果:ZERO_MODE=header で +15.34% improvement(1M iterations, C6-heavy)

## Files Modified
- core/box/pool_api.inc.h: pool_zero_mode_box.h include
- core/bench_profile.h: glibc setenv → malloc+putenv(segfault 回避)
- core/hakmem_pool.c: zero mode 参照・制御ロジック
- core/box/pool_zero_mode_box.h (新設): enum/getter
- CURRENT_TASK.md: Phase ML1 結果記載

## Test Results
| Iterations | ZERO_MODE=full | ZERO_MODE=header | Improvement |
|-----------|----------------|-----------------|------------|
| 10K       | 3.06 M ops/s   | 3.17 M ops/s    | +3.65%     |
| 1M        | 23.71 M ops/s  | 27.34 M ops/s   | **+15.34%** |

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-10 09:08:18 +09:00

466 lines
23 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// tiny_superslab_free.inc.h - SuperSlab Free Layer
// Purpose: Same-thread and cross-thread free handling
// Extracted from: hakmem_tiny_free.inc lines 1171-1475
// Box Theory: Box 6 (Free Fast Path) + Box 2 (Remote Queue) integration
//
// Public functions:
// - hak_tiny_free_superslab(): Main SuperSlab free entry point
#include <stdatomic.h>
#include "box/ptr_type_box.h" // Phase 10
#include "box/free_remote_box.h"
#include "box/free_local_box.h"
#include "hakmem_env_cache.h" // Priority-2: ENV cache (eliminate syscalls)
// Phase 6.22-B: SuperSlab fast free path
static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
// Route trace: count SuperSlab free entries (diagnostics only)
extern _Atomic uint64_t g_free_ss_enter;
atomic_fetch_add_explicit(&g_free_ss_enter, 1, memory_order_relaxed);
ROUTE_MARK(16); // free_enter
HAK_DBG_INC(g_superslab_free_count); // Phase 7.6: Track SuperSlab frees
// Phase 10: Convert USER → BASE at entry point (single conversion)
hak_user_ptr_t user_ptr = HAK_USER_FROM_RAW(ptr);
hak_base_ptr_t base_ptr = hak_user_to_base(user_ptr);
void* base = HAK_BASE_TO_RAW(base_ptr);
// Get slab index (supports 1MB/2MB SuperSlabs)
// CRITICAL: Use BASE pointer for slab_index calculation!
int slab_idx = slab_index_for(ss, base);
size_t ss_size = (size_t)1ULL << ss->lg_size;
uintptr_t ss_base = (uintptr_t)ss;
if (__builtin_expect(slab_idx < 0, 0)) {
uintptr_t aux = tiny_remote_pack_diag(0xBAD1u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, 0xFFu, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
TinySlabMeta* meta = &ss->slabs[slab_idx];
uint8_t cls = meta->class_idx;
// Debug: Log first C7 alloc/free for path verification
if (cls == 7) {
static _Atomic int c7_free_count = 0;
int count = atomic_fetch_add_explicit(&c7_free_count, 1, memory_order_relaxed);
if (count == 0) {
#if !HAKMEM_BUILD_RELEASE && HAKMEM_DEBUG_VERBOSE
fprintf(stderr, "[C7_FIRST_FREE] ptr=%p base=%p slab_idx=%d\n", ptr, base, slab_idx);
#endif
}
}
if (__builtin_expect(tiny_remote_watch_is(ptr), 0)) {
tiny_remote_watch_note("free_enter", ss, slab_idx, ptr, 0xA240u, tiny_self_u32(), 0);
#if !HAKMEM_BUILD_RELEASE
extern __thread TinyTLSSlab g_tls_slabs[];
tiny_alloc_dump_tls_state(cls, "watch_free_enter", &g_tls_slabs[cls]);
extern __thread TinyTLSMag g_tls_mags[];
TinyTLSMag* watch_mag = &g_tls_mags[cls];
fprintf(stderr,
"[REMOTE_WATCH_MAG] cls=%u mag_top=%d cap=%d\n",
cls,
watch_mag->top,
watch_mag->cap);
#endif
}
// BUGFIX (Phase 12): Validate class_idx before using as array index
if (__builtin_expect(cls >= TINY_NUM_CLASSES, 0)) {
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, 0xF1, ptr, (uintptr_t)cls);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
// ChatGPT Pro Optimization: Move safety checks to debug mode only
// In release builds, these checks are completely eliminated by the compiler
// Expected impact: -10~-15% CPU (eliminates O(n) duplicate scan)
#if !HAKMEM_BUILD_RELEASE
if (__builtin_expect(g_tiny_safe_free, 0)) {
size_t blk = g_tiny_class_sizes[cls];
uint8_t* slab_base_ptr = tiny_slab_base_for(ss, slab_idx);
uintptr_t delta = (uintptr_t)ptr - (uintptr_t)slab_base_ptr;
int cap_ok = (meta->capacity > 0) ? 1 : 0;
int align_ok = (delta % blk) == 0;
int range_ok = cap_ok && (delta / blk) < meta->capacity;
if (!align_ok || !range_ok) {
uint32_t code = 0xA100u;
if (align_ok) code |= 0x2u;
if (range_ok) code |= 0x1u;
uintptr_t aux = tiny_remote_pack_diag(code, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
// Duplicate in freelist (best-effort scan up to 64)
// NOTE: This O(n) scan is VERY expensive (can scan 64 pointers per free!)
void* scan = meta->freelist; int scanned = 0; int dup = 0;
while (scan && scanned < 64) { if (scan == base) { dup = 1; break; } scan = tiny_next_read(cls, scan); scanned++; }
if (dup) {
uintptr_t aux = tiny_remote_pack_diag(0xDFu, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
}
#endif // !HAKMEM_BUILD_RELEASE
// Phase E1-CORRECT: C7 now has headers like other classes
// Validation must check base pointer (ptr-1) alignment, not user ptr
if (__builtin_expect(cls == 7, 0)) {
size_t blk = g_tiny_class_sizes[cls];
uint8_t* slab_base = tiny_slab_base_for(ss, slab_idx);
uintptr_t delta = (uintptr_t)base - (uintptr_t)slab_base;
int cap_ok = (meta->capacity > 0) ? 1 : 0;
int align_ok = (delta % blk) == 0;
int range_ok = cap_ok && (delta / blk) < meta->capacity;
if (!align_ok || !range_ok) {
uintptr_t aux = tiny_remote_pack_diag(0xA107u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
#if !HAKMEM_BUILD_RELEASE
// Debug build: Print diagnostic info before failing
fprintf(stderr, "[C7_ALIGN_CHECK_FAIL] ptr=%p base=%p slab_base=%p\n", ptr, base, (void*)slab_base);
fprintf(stderr, "[C7_ALIGN_CHECK_FAIL] delta=%zu blk=%zu delta%%blk=%zu\n",
(size_t)delta, blk, (size_t)(delta % blk));
fprintf(stderr, "[C7_ALIGN_CHECK_FAIL] align_ok=%d range_ok=%d cap=%u capacity=%u\n",
align_ok, range_ok, (unsigned)(delta / blk), (unsigned)meta->capacity);
#endif
// BUGFIX: Guard with g_tiny_safe_free_strict like other validation checks
// Fail-fast in class7 to avoid silent SLL/freelist corruption (only if strict mode enabled)
if (g_tiny_safe_free_strict) { raise(SIGUSR2); }
return;
}
}
// Header vs meta class mismatch detectorenv: HAKMEM_TINY_SLL_DIAG、初回数件のみ
// Priority-2: Use cached ENV (eliminate lazy-init syscall overhead)
#if !HAKMEM_BUILD_RELEASE
do {
if (__builtin_expect(HAK_ENV_TINY_SLL_DIAG(), 0)) {
uint8_t hdr = *(uint8_t*)base;
uint8_t expect = (uint8_t)(HEADER_MAGIC | (cls & HEADER_CLASS_MASK));
if (__builtin_expect(hdr != expect, 0)) {
static _Atomic uint32_t g_hdr_mismatch_log = 0;
uint32_t n = atomic_fetch_add_explicit(&g_hdr_mismatch_log, 1, memory_order_relaxed);
if (n < 8) {
fprintf(stderr,
"[TLS_HDR_MISMATCH] cls=%u slab_idx=%d hdr=0x%02x expect=0x%02x ptr=%p\n",
(unsigned)cls, slab_idx, hdr, expect, ptr);
}
}
}
} while (0);
#endif
// Phase 6.23: Same-thread check (Phase 12: owner_tid_low)
uint32_t my_tid = tiny_self_u32();
uint8_t my_tid_low = (uint8_t)((my_tid >> 8) & 0xFFu);
const int debug_guard = g_debug_remote_guard;
static __thread int g_debug_free_count = 0;
// If owner is not set yet, claim ownership (low 8 bits) to avoid spurious remote path in 1T
if (!g_tiny_force_remote && meta->owner_tid_low == 0) {
meta->owner_tid_low = my_tid_low;
}
if (!g_tiny_force_remote && meta->owner_tid_low != 0 && meta->owner_tid_low == my_tid_low) {
ROUTE_MARK(17); // free_same_thread
// Fast path: Direct freelist push (same-thread)
if (0 && debug_guard && g_debug_free_count < 1) {
fprintf(stderr, "[FREE_SS] SAME-THREAD: owner=%u my=%u\n",
meta->owner_tid_low, my_tid);
g_debug_free_count++;
}
// Header/meta class cross-check (triage)
#if HAKMEM_TINY_HEADER_CLASSIDX
do {
uint8_t hdr_cls = tiny_region_id_read_header(ptr);
uint8_t meta_cls = meta->class_idx;
if (__builtin_expect(hdr_cls != meta_cls, 0)) {
static _Atomic uint32_t g_hdr_meta_mismatch = 0;
uint32_t n = atomic_fetch_add_explicit(&g_hdr_meta_mismatch, 1, memory_order_relaxed);
if (n < 16) {
fprintf(stderr, "[SLAB_HDR_META_MISMATCH] slab_push cls_meta=%u hdr_cls=%u ptr=%p slab_idx=%d ss=%p freelist=%p used=%u\n",
(unsigned)meta_cls, (unsigned)hdr_cls, ptr, slab_idx, (void*)ss, meta->freelist, (unsigned)meta->used);
}
}
} while (0);
#endif
if (__builtin_expect(meta->used == 0, 0)) {
uintptr_t aux = tiny_remote_pack_diag(0x00u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
tiny_remote_track_expect_alloc(ss, slab_idx, ptr, "local_free_enter", my_tid);
if (!tiny_remote_guard_allow_local_push(ss, slab_idx, meta, ptr, "local_free", my_tid)) {
int transitioned = tiny_free_remote_box(ss, slab_idx, meta, base_ptr, my_tid);
if (transitioned) {
extern unsigned long long g_remote_free_transitions[];
g_remote_free_transitions[cls]++;
// Free-side route: remote transition observed
// Priority-2: Use cached ENV (eliminate lazy-init syscall overhead)
do {
if (HAK_ENV_TINY_ROUTE_FREE()) route_free_commit((int)cls, (1ull<<18), 0xE2);
} while (0);
}
return;
}
// Optional: MidTC (TLS tcache for 128..1024B) — allow bypass via env HAKMEM_TINY_FREE_TO_SS=1
// Priority-2: Use cached ENV (eliminate lazy-init syscall overhead)
do {
if (!HAK_ENV_TINY_FREE_TO_SS()) {
int mid_cls = (int)cls;
if (midtc_enabled() && mid_cls >= 4) {
if (midtc_push(mid_cls, base)) {
// Treat as returned to TLS cache (not SS freelist)
meta->used--;
ss_active_dec_one(ss);
return;
}
}
}
} while (0);
#if !HAKMEM_BUILD_RELEASE
// DEBUG LOGGING - Track freelist operations
// Priority-2: Use cached ENV (eliminate lazy-init TLS overhead)
static __thread int free_count = 0;
if (HAK_ENV_SS_FREE_DEBUG() && (free_count++ % 1000) == 0) {
#else
if (0) {
#endif
fprintf(stderr, "[FREE_LOCAL] cls=%u slab=%d meta->used=%u (before dec)\n",
cls, slab_idx, meta->used);
}
// Perform freelist push (+first-free publish if applicable)
void* prev_before = meta->freelist;
// Phase 10: Use base_ptr
tiny_free_local_box(ss, slab_idx, meta, base_ptr, my_tid);
if (prev_before == NULL) {
ROUTE_MARK(19); // first_free_transition
extern unsigned long long g_first_free_transitions[];
g_first_free_transitions[cls]++;
ROUTE_MARK(20); // mailbox_publish
// Free-side route commit (one-shot)
do {
static int g_route_free = -1;
#if HAKMEM_BUILD_RELEASE
g_route_free = 0;
#else
// Priority-2: Use cached ENV (eliminate lazy-init static overhead)
g_route_free = HAK_ENV_TINY_ROUTE_FREE();
#endif
if (g_route_free) route_free_commit(cls, (1ull<<19) | (1ull<<20), 0xE1);
} while (0);
}
if (__builtin_expect(debug_guard, 0)) {
fprintf(stderr, "[REMOTE_LOCAL] cls=%u slab=%d owner=%u my=%u ptr=%p prev=%p used=%u\n",
cls, slab_idx, meta->owner_tid_low, my_tid, ptr, prev_before, meta->used);
}
// 空検出は別途(ホットパス除外)
// Phase 12: slab empty → shared pool に返却
if (meta->used == 0) {
// DEBUG LOGGING - Priority-2: Use cached ENV (eliminate lazy-init TLS overhead)
#if !HAKMEM_BUILD_RELEASE
if (HAK_ENV_SS_FREE_DEBUG()) {
#else
if (0) {
#endif
fprintf(stderr, "[FREE_PATH] meta->used=0 detected: cls=%u ss=%p slab_idx=%d\n",
cls, (void*)ss, slab_idx);
}
extern void shared_pool_release_slab(SuperSlab* ss, int slab_idx);
shared_pool_release_slab(ss, slab_idx);
}
} else {
ROUTE_MARK(18); // free_remote_transition
if (__builtin_expect(meta->owner_tid_low == my_tid_low && meta->owner_tid_low == 0, 0)) {
uintptr_t aux = tiny_remote_pack_diag(0xA300u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
if (debug_guard) {
fprintf(stderr, "[REMOTE_OWNER_ZERO] cls=%u slab=%d ptr=%p my=%u used=%u\n",
cls, slab_idx, ptr, my_tid, (unsigned)meta->used);
}
}
tiny_remote_track_expect_alloc(ss, slab_idx, ptr, "remote_free_enter", my_tid);
// Slow path: Remote free (cross-thread)
if (0 && debug_guard && g_debug_free_count < 5) {
fprintf(stderr, "[FREE_SS] CROSS-THREAD: owner=%u my=%u slab_idx=%d\n",
meta->owner_tid_low, my_tid, slab_idx);
g_debug_free_count++;
}
if (__builtin_expect(g_tiny_safe_free, 0)) {
// Best-effort duplicate scan in remote stack (up to 64 nodes)
uintptr_t head = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire);
uintptr_t base_addr = ss_base;
int scanned = 0; int dup = 0;
uintptr_t cur = head;
while (cur && scanned < 64) {
if ((cur < base_addr) || (cur >= base_addr + ss_size)) {
uintptr_t aux = tiny_remote_pack_diag(0xA200u, base_addr, ss_size, cur);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, (void*)cur, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
break;
}
if ((void*)cur == base) { dup = 1; break; } // Check against BASE
if (__builtin_expect(g_remote_side_enable, 0)) {
if (!tiny_remote_sentinel_ok((void*)cur)) {
uintptr_t aux = tiny_remote_pack_diag(0xA202u, base_addr, ss_size, cur);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, (void*)cur, aux);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, (void*)cur, aux);
uintptr_t observed = atomic_load_explicit((_Atomic uintptr_t*)(void*)cur, memory_order_relaxed);
tiny_remote_report_corruption("scan", (void*)cur, observed);
if (__builtin_expect(g_debug_remote_guard, 0)) {
fprintf(stderr,
"[REMOTE_SENTINEL] cls=%u slab=%d cur=%p head=%p ptr=%p scanned=%d observed=0x%016" PRIxPTR " owner=%u used=%u freelist=%p remote_head=%p\n",
cls,
slab_idx,
(void*)cur,
(void*)head,
ptr,
scanned,
observed,
meta->owner_tid_low,
(unsigned)meta->used,
meta->freelist,
(void*)atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed));
}
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
break;
}
cur = tiny_remote_side_get(ss, slab_idx, (void*)cur);
} else {
if ((cur & (uintptr_t)(sizeof(void*) - 1)) != 0) {
uintptr_t aux = tiny_remote_pack_diag(0xA201u, base_addr, ss_size, cur);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, (void*)cur, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
break;
}
cur = (uintptr_t)tiny_next_read(cls, (void*)cur);
}
scanned++;
}
if (dup) {
uintptr_t aux = tiny_remote_pack_diag(0xD1u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
}
if (__builtin_expect(meta->used == 0, 0)) {
uintptr_t aux = tiny_remote_pack_diag(0x01u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
return;
}
static int g_ss_adopt_en2 = -1; // env cached
if (g_ss_adopt_en2 == -1) {
char* e = getenv("HAKMEM_TINY_SS_ADOPT");
// 既定: Remote Queueを使う1。env指定時のみ上書き。
g_ss_adopt_en2 = (e == NULL) ? 1 : ((*e != '0') ? 1 : 0);
if (__builtin_expect(debug_guard, 0)) {
fprintf(stderr, "[FREE_SS] g_ss_adopt_en2=%d (env='%s')\n", g_ss_adopt_en2, e ? e : "(null)");
}
}
// A/B gate: disable remote MPSC (use legacy freelist push)
do {
static int g_disable_remote = -1;
if (__builtin_expect(g_disable_remote == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_DISABLE_REMOTE");
g_disable_remote = (e && *e && *e != '0') ? 1 : 0;
}
if (__builtin_expect(g_disable_remote, 0)) {
g_ss_adopt_en2 = 0;
}
} while (0);
if (g_ss_adopt_en2) {
// Use remote queue
uintptr_t head_word = __atomic_load_n((uintptr_t*)base, __ATOMIC_RELAXED);
if (debug_guard) fprintf(stderr, "[REMOTE_PUSH_CALL] cls=%u slab=%d owner=%u my=%u ptr=%p used=%u remote_count=%u head=%p word=0x%016" PRIxPTR "\n",
cls,
slab_idx,
meta->owner_tid_low,
my_tid,
ptr,
(unsigned)meta->used,
atomic_load_explicit(&ss->remote_counts[slab_idx], memory_order_relaxed),
(void*)atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed),
head_word);
int dup_remote = tiny_remote_queue_contains_guard(ss, slab_idx, base);
if (!dup_remote && __builtin_expect(g_remote_side_enable, 0)) {
dup_remote = (head_word == TINY_REMOTE_SENTINEL) || tiny_remote_side_contains(ss, slab_idx, base);
}
if (__builtin_expect(head_word == TINY_REMOTE_SENTINEL && !dup_remote && g_debug_remote_guard, 0)) {
tiny_remote_watch_note("dup_scan_miss", ss, slab_idx, ptr, 0xA215u, my_tid, 0);
}
if (dup_remote) {
uintptr_t aux = tiny_remote_pack_diag(0xA214u, ss_base, ss_size, (uintptr_t)ptr);
tiny_remote_watch_mark(ptr, "dup_prevent", my_tid);
tiny_remote_watch_note("dup_prevent", ss, slab_idx, ptr, 0xA214u, my_tid, 0);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
tiny_failfast_abort_ptr("double_free_remote", ss, slab_idx, ptr, "remote_side_contains");
}
if (__builtin_expect(g_remote_side_enable && (head_word & 0xFFFFu) == 0x6261u, 0)) {
// TLS guard scribble detected on the node's first word → same-pointer double free across routes
uintptr_t aux = tiny_remote_pack_diag(0xA213u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)cls, ptr, aux);
tiny_remote_watch_mark(ptr, "pre_push", my_tid);
tiny_remote_watch_note("pre_push", ss, slab_idx, ptr, 0xA231u, my_tid, 0);
tiny_remote_report_corruption("pre_push", ptr, head_word);
tiny_failfast_abort_ptr("double_free_scribble", ss, slab_idx, ptr, "scribble_6261");
}
if (__builtin_expect(tiny_remote_watch_is(ptr), 0)) {
tiny_remote_watch_note("free_remote", ss, slab_idx, ptr, 0xA232u, my_tid, 0);
}
// Phase 10: Use base_ptr
int was_empty = tiny_free_remote_box(ss, slab_idx, meta, base_ptr, my_tid);
meta->used--;
// ss_active_dec_one(ss); // REMOVED: Already called inside ss_remote_push()
if (was_empty) {
extern unsigned long long g_remote_free_transitions[];
g_remote_free_transitions[cls]++;
g_remote_free_transitions[cls]++;
ss_partial_publish((int)cls, ss);
}
} else {
// Fallback: direct freelist push (legacy)
if (debug_guard) fprintf(stderr, "[FREE_SS] Using LEGACY freelist push (not remote queue)\n");
void* prev = meta->freelist;
tiny_next_write(cls, base, prev);
meta->freelist = base;
tiny_failfast_log("free_local_legacy", cls, ss, meta, ptr, prev);
// Priority-2: Use cached ENV (eliminate lazy-init static overhead)
do {
#if !HAKMEM_BUILD_RELEASE
int g_mask_en = (HAK_ENV_TINY_FREELIST_MASK() != 0) ? 1 : 0;
#else
int g_mask_en = 0;
#endif
if (__builtin_expect(g_mask_en, 0) && prev == NULL) {
uint32_t bit = (1u << slab_idx);
atomic_fetch_or_explicit(&ss->freelist_mask, bit, memory_order_release);
}
} while (0);
meta->used--;
ss_active_dec_one(ss);
if (prev == NULL) {
ss_partial_publish((int)cls, ss);
}
}
// 空検出は別途(ホットパス除外)
// Phase 12: slab empty → shared pool に返却
if (meta->used == 0) {
// DEBUG LOGGING - Priority-2: Use cached ENV (eliminate lazy-init TLS overhead)
#if !HAKMEM_BUILD_RELEASE
if (HAK_ENV_SS_FREE_DEBUG()) {
#else
if (0) {
#endif
fprintf(stderr, "[FREE_PATH] meta->used=0 detected: cls=%u ss=%p slab_idx=%d\n",
cls, (void*)ss, slab_idx);
}
extern void shared_pool_release_slab(SuperSlab* ss, int slab_idx);
shared_pool_release_slab(ss, slab_idx);
}
}
}