Phase 35-A: BENCH_MINIMAL gate function elimination (GO +4.39%) - tiny_front_v3_enabled() → constant true - tiny_metadata_cache_enabled() → constant 0 - learner_v7_enabled() → constant false - small_learner_v2_enabled() → constant false Phase 36: Policy snapshot init-once (GO +0.71%) - small_policy_v7_snapshot() version check skip in BENCH_MINIMAL - TLS cache for policy snapshot Phase 37: Standard TLS cache (NO-GO -0.07%) - TLS cache for Standard build attempted - Runtime gate overhead negates benefit Phase 38: FAST/OBSERVE/Standard workflow established - make perf_fast, make perf_observe targets - Scorecard and documentation updates Phase 39: Hot path gate constantization (GO +1.98%) - front_gate_unified_enabled() → constant 1 - alloc_dualhot_enabled() → constant 0 - g_bench_fast_front, g_v3_enabled blocks → compile-out - free_dispatch_stats_enabled() → constant false Results: - FAST v3: 56.04M ops/s (47.4% of mimalloc) - Standard: 53.50M ops/s (45.3% of mimalloc) - M1 target (50%): 5.5% remaining 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
700 lines
32 KiB
C++
700 lines
32 KiB
C++
#include <inttypes.h>
|
||
#include <pthread.h>
|
||
#include "tiny_remote.h"
|
||
#include "slab_handle.h"
|
||
#include "tiny_refill.h"
|
||
#include "tiny_tls_guard.h"
|
||
#include "box/free_publish_box.h"
|
||
#include "box/tls_sll_box.h" // Box TLS-SLL: C7-safe push/pop/splice
|
||
#include "box/tiny_next_ptr_box.h" // Box API: next pointer read/write
|
||
#include "box/tiny_header_box.h" // Header Box: Single Source of Truth for header operations
|
||
#include "box/tiny_front_config_box.h" // Phase 7-Step5: Config macros for dead code elimination
|
||
#include "tiny_region_id.h" // HEADER_MAGIC, HEADER_CLASS_MASK for freelist header restoration
|
||
#include "mid_tcache.h"
|
||
#include "front/tiny_heap_v2.h"
|
||
#include "box/ptr_type_box.h" // Phase 10: Type Safety
|
||
#if HAKMEM_TINY_SS_TLS_HINT
|
||
#include "box/tls_ss_hint_box.h" // Phase 1: TLS SuperSlab Hint Cache for Headerless mode
|
||
#endif
|
||
// Phase 3d-B: TLS Cache Merge - Unified TLS SLL structure
|
||
extern __thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES];
|
||
#if !HAKMEM_BUILD_RELEASE
|
||
#include "hakmem_tiny_magazine.h"
|
||
#endif
|
||
extern int g_tiny_force_remote;
|
||
|
||
// ENV: HAKMEM_TINY_DRAIN_TO_SLL (0=off) — adopt/bind境界でfreelist→TLS SLLへN個スプライス
|
||
static inline int tiny_drain_to_sll_budget(void) {
|
||
static int v = -1;
|
||
if (__builtin_expect(v == -1, 0)) {
|
||
const char* s = getenv("HAKMEM_TINY_DRAIN_TO_SLL");
|
||
int parsed = (s && *s) ? atoi(s) : 0;
|
||
if (parsed < 0) parsed = 0;
|
||
if (parsed > 256) parsed = 256;
|
||
v = parsed;
|
||
}
|
||
return v;
|
||
}
|
||
|
||
static inline void tiny_drain_freelist_to_sll_once(SuperSlab* ss, int slab_idx, int class_idx) {
|
||
int budget = tiny_drain_to_sll_budget();
|
||
if (__builtin_expect(budget <= 0, 1)) return;
|
||
// Phase E1-CORRECT: C7 now has headers, can use TLS SLL like other classes
|
||
// (removed early return for class_idx == 7)
|
||
if (!(ss && ss->magic == SUPERSLAB_MAGIC)) return;
|
||
if (slab_idx < 0) return;
|
||
TinySlabMeta* m = &ss->slabs[slab_idx];
|
||
int moved = 0;
|
||
while (m->freelist && moved < budget) {
|
||
void* p = m->freelist;
|
||
|
||
// CORRUPTION DEBUG: Validate freelist pointer before moving to TLS SLL
|
||
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
|
||
extern const size_t g_tiny_class_sizes[];
|
||
size_t blk = g_tiny_class_sizes[class_idx];
|
||
void* old_head_raw = HAK_BASE_TO_RAW(g_tls_sll[class_idx].head);
|
||
|
||
// Validate p alignment
|
||
if (((uintptr_t)p % blk) != 0) {
|
||
fprintf(stderr, "[DRAIN_CORRUPT] Freelist ptr=%p misaligned (cls=%d blk=%zu offset=%zu)\n",
|
||
p, class_idx, blk, (uintptr_t)p % blk);
|
||
fprintf(stderr, "[DRAIN_CORRUPT] Attempting to drain corrupted freelist to TLS SLL!\n");
|
||
fprintf(stderr, "[DRAIN_CORRUPT] ss=%p slab=%d moved=%d/%d\n", ss, slab_idx, moved, budget);
|
||
abort();
|
||
}
|
||
|
||
// Validate old_head alignment if not NULL
|
||
if (old_head_raw && ((uintptr_t)old_head_raw % blk) != 0) {
|
||
fprintf(stderr, "[DRAIN_CORRUPT] TLS SLL head=%p already corrupted! (cls=%d blk=%zu offset=%zu)\n",
|
||
old_head_raw, class_idx, blk, (uintptr_t)old_head_raw % blk);
|
||
fprintf(stderr, "[DRAIN_CORRUPT] Corruption detected BEFORE drain write (ptr=%p)\n", p);
|
||
fprintf(stderr, "[DRAIN_CORRUPT] ss=%p slab=%d moved=%d/%d\n", ss, slab_idx, moved, budget);
|
||
abort();
|
||
}
|
||
|
||
fprintf(stderr, "[DRAIN_TO_SLL] cls=%d ptr=%p old_head=%p moved=%d/%d\n",
|
||
class_idx, p, old_head_raw, moved, budget);
|
||
}
|
||
|
||
m->freelist = tiny_next_read(class_idx, p); // Phase E1-CORRECT: Box API
|
||
|
||
// CRITICAL FIX: Restore header BEFORE pushing to TLS SLL
|
||
// Freelist blocks may have stale data at offset 0
|
||
// Uses Header Box API (C1-C6 only; C0/C7 skip)
|
||
tiny_header_write_if_preserved(p, class_idx);
|
||
|
||
// Use Box TLS-SLL API (C7-safe push)
|
||
// Note: C7 already rejected at line 34, so this always succeeds
|
||
uint32_t sll_capacity = 256; // Conservative limit
|
||
// Phase 10: p is BASE pointer (freelist), wrap it
|
||
if (tls_sll_push(class_idx, HAK_BASE_FROM_RAW(p), sll_capacity)) {
|
||
moved++;
|
||
} else {
|
||
// SLL full, stop draining
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
static inline int tiny_remote_queue_contains_guard(SuperSlab* ss, int slab_idx, void* target) {
|
||
if (!ss || slab_idx < 0) return 0;
|
||
uintptr_t cur = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire);
|
||
int limit = 8192;
|
||
while (cur && limit-- > 0) {
|
||
if ((void*)cur == target) {
|
||
return 1;
|
||
}
|
||
uintptr_t next;
|
||
if (__builtin_expect(g_remote_side_enable, 0)) {
|
||
next = tiny_remote_side_get(ss, slab_idx, (void*)cur);
|
||
} else {
|
||
next = atomic_load_explicit((_Atomic uintptr_t*)cur, memory_order_relaxed);
|
||
}
|
||
cur = next;
|
||
}
|
||
if (limit <= 0) {
|
||
return 1; // fail-safe: treat unbounded traversal as duplicate
|
||
}
|
||
return 0;
|
||
}
|
||
|
||
|
||
// Phase 6.12.1: Free with pre-calculated slab (Option C - avoids duplicate lookup)
|
||
void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
||
// Phase 7.6: slab == NULL means SuperSlab mode (Magazine integration)
|
||
SuperSlab* ss = NULL;
|
||
TinySlabMeta* meta = NULL;
|
||
int class_idx = -1;
|
||
int slab_idx = -1;
|
||
if (!slab) {
|
||
// SuperSlab path: Get class_idx from SuperSlab
|
||
ss = hak_super_lookup(ptr);
|
||
if (!ss || ss->magic != SUPERSLAB_MAGIC) return;
|
||
// Derive class_idx from per-slab metadata instead of ss->size_class
|
||
class_idx = -1;
|
||
// void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr (USER) directly
|
||
slab_idx = slab_index_for(ss, ptr); // FIX: slab_index_for works better with ptr (USER) for C0/C7
|
||
if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
|
||
TinySlabMeta* meta_probe = &ss->slabs[slab_idx];
|
||
if (meta_probe->class_idx < TINY_NUM_CLASSES) {
|
||
class_idx = (int)meta_probe->class_idx;
|
||
}
|
||
}
|
||
size_t ss_size = (size_t)1ULL << ss->lg_size;
|
||
uintptr_t ss_base = (uintptr_t)ss;
|
||
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
|
||
tiny_debug_ring_record(TINY_RING_EVENT_SUPERSLAB_ADOPT_FAIL, (uint16_t)0xFFu, ss, (uintptr_t)class_idx);
|
||
return;
|
||
}
|
||
// Optional: cross-lookup TinySlab owner and detect class mismatch early
|
||
// Phase E1-CORRECT: All classes have headers now, standard safe_free guard
|
||
if (__builtin_expect(g_tiny_safe_free, 0)) {
|
||
TinySlab* ts = hak_tiny_owner_slab(ptr);
|
||
if (ts) {
|
||
int ts_cls = ts->class_idx;
|
||
if (ts_cls >= 0 && ts_cls < TINY_NUM_CLASSES && ts_cls != class_idx) {
|
||
uint32_t code = 0xAA00u | ((uint32_t)ts_cls & 0xFFu);
|
||
uintptr_t aux = tiny_remote_pack_diag(code, ss_base, ss_size, (uintptr_t)ptr);
|
||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)class_idx, ptr, aux);
|
||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
|
||
}
|
||
}
|
||
}
|
||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_ENTER, (uint16_t)class_idx, ptr, 0);
|
||
// Detect cross-thread: cross-thread free MUST go via superslab path
|
||
// FIX: Use ptr (USER) for slab index calculation to handle C0/C7 boundary correctly
|
||
// base = ptr_user_to_base_blind(ptr);
|
||
slab_idx = slab_index_for(ss, ptr);
|
||
int ss_cap = ss_slabs_capacity(ss);
|
||
if (__builtin_expect(slab_idx < 0 || slab_idx >= ss_cap, 0)) {
|
||
tiny_debug_ring_record(TINY_RING_EVENT_SUPERSLAB_ADOPT_FAIL, (uint16_t)0xFEu, ss, (uintptr_t)slab_idx);
|
||
return;
|
||
}
|
||
meta = &ss->slabs[slab_idx];
|
||
if (__builtin_expect(g_tiny_safe_free, 0)) {
|
||
size_t blk = g_tiny_class_sizes[class_idx];
|
||
uint8_t* slab_base = tiny_slab_base_for(ss, slab_idx);
|
||
// Phase E1-CORRECT: All classes have headers, validate block base using known class_idx
|
||
uintptr_t delta = (uintptr_t)HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx)) - (uintptr_t)slab_base;
|
||
int cap_ok = (meta->capacity > 0) ? 1 : 0;
|
||
int align_ok = (delta % blk) == 0;
|
||
int range_ok = cap_ok && (delta / blk) < meta->capacity;
|
||
if (!align_ok || !range_ok) {
|
||
uint32_t code = 0xA100u;
|
||
if (align_ok) code |= 0x2u;
|
||
if (range_ok) code |= 0x1u;
|
||
uintptr_t aux = tiny_remote_pack_diag(code, ss_base, ss_size, (uintptr_t)ptr);
|
||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)class_idx, ptr, aux);
|
||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
|
||
return;
|
||
}
|
||
}
|
||
uint32_t self_tid = tiny_self_u32();
|
||
uint8_t self_tid_low = (uint8_t)self_tid;
|
||
if (__builtin_expect(meta->owner_tid_low != self_tid_low || meta->owner_tid_low == 0, 0)) {
|
||
// route directly to superslab (remote queue / freelist)
|
||
uintptr_t ptr_val = (uintptr_t)ptr;
|
||
uintptr_t ss_base = (uintptr_t)ss;
|
||
size_t ss_size = (size_t)1ULL << ss->lg_size;
|
||
if (__builtin_expect(ptr_val < ss_base || ptr_val >= ss_base + ss_size, 0)) {
|
||
tiny_debug_ring_record(TINY_RING_EVENT_SUPERSLAB_ADOPT_FAIL, (uint16_t)0xFDu, ss, ptr_val);
|
||
return;
|
||
}
|
||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_REMOTE, (uint16_t)class_idx, ss, (uintptr_t)ptr);
|
||
hak_tiny_free_superslab(ptr, ss);
|
||
HAK_STAT_FREE(class_idx);
|
||
return;
|
||
}
|
||
|
||
// A/B: Force SS freelist path for same-thread frees (publish on first-free)
|
||
do {
|
||
static int g_free_to_ss2 = -1;
|
||
if (__builtin_expect(g_free_to_ss2 == -1, 0)) {
|
||
const char* e = getenv("HAKMEM_TINY_FREE_TO_SS");
|
||
g_free_to_ss2 = (e && *e && *e != '0') ? 1 : 0; // default OFF
|
||
}
|
||
if (g_free_to_ss2) {
|
||
hak_tiny_free_superslab(ptr, ss);
|
||
HAK_STAT_FREE(class_idx);
|
||
return;
|
||
}
|
||
} while (0);
|
||
|
||
if (__builtin_expect(g_debug_fast0, 0)) {
|
||
tiny_debug_ring_record(TINY_RING_EVENT_FRONT_BYPASS, (uint16_t)class_idx, ptr, (uintptr_t)slab_idx);
|
||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||
void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx));
|
||
void* prev = meta->freelist;
|
||
tiny_next_write(class_idx, base, prev); // Box API: uses offset 1 for headers
|
||
meta->freelist = base;
|
||
meta->used--;
|
||
ss_active_dec_one(ss);
|
||
if (prev == NULL) {
|
||
// Publish using the slab's class (per-slab class_idx)
|
||
ss_partial_publish(class_idx, ss);
|
||
}
|
||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_LOCAL, (uint16_t)class_idx, ptr, (uintptr_t)slab_idx);
|
||
HAK_STAT_FREE(class_idx);
|
||
return;
|
||
}
|
||
|
||
// Front-V2: try to return to TLS magazine first (A/B, default OFF)
|
||
// Phase 7-Step8: Use config macro for dead code elimination in PGO mode
|
||
if (__builtin_expect(TINY_FRONT_HEAP_V2_ENABLED && class_idx <= 3, 0)) {
|
||
void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx));
|
||
if (tiny_heap_v2_try_push(class_idx, base)) {
|
||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_FAST, (uint16_t)class_idx, ptr, slab_idx);
|
||
HAK_STAT_FREE(class_idx);
|
||
return;
|
||
}
|
||
}
|
||
|
||
if (g_fast_enable && g_fast_cap[class_idx] != 0) {
|
||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||
hak_base_ptr_t base = ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx);
|
||
int pushed = 0;
|
||
// Phase 7-Step5: Use config macro for dead code elimination in PGO mode
|
||
if (__builtin_expect(TINY_FRONT_FASTCACHE_ENABLED && class_idx <= 3, 1)) {
|
||
pushed = fastcache_push(class_idx, base);
|
||
} else {
|
||
pushed = tiny_fast_push(class_idx, base);
|
||
}
|
||
if (pushed) {
|
||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_FAST, (uint16_t)class_idx, ptr, slab_idx);
|
||
HAK_STAT_FREE(class_idx);
|
||
return;
|
||
}
|
||
}
|
||
|
||
if (g_tls_list_enable && class_idx != 7) {
|
||
TinyTLSList* tls = &g_tls_lists[class_idx];
|
||
uint32_t seq = atomic_load_explicit(&g_tls_param_seq[class_idx], memory_order_relaxed);
|
||
if (__builtin_expect(seq != g_tls_param_seen[class_idx], 0)) {
|
||
tiny_tls_refresh_params(class_idx, tls);
|
||
}
|
||
// TinyHotMag front push(8/16/32B, A/B)
|
||
if (__builtin_expect(g_hotmag_enable && class_idx <= 2, 1)) {
|
||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||
void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx));
|
||
if (hotmag_push(class_idx, base)) {
|
||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_RETURN_MAG, (uint16_t)class_idx, ptr, 1);
|
||
HAK_STAT_FREE(class_idx);
|
||
return;
|
||
}
|
||
}
|
||
if (tls->count < tls->cap) {
|
||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||
void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx));
|
||
tiny_tls_list_guard_push(class_idx, tls, base);
|
||
tls_list_push(tls, base, class_idx);
|
||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_LOCAL, (uint16_t)class_idx, ptr, 0);
|
||
HAK_STAT_FREE(class_idx);
|
||
return;
|
||
}
|
||
seq = atomic_load_explicit(&g_tls_param_seq[class_idx], memory_order_relaxed);
|
||
if (__builtin_expect(seq != g_tls_param_seen[class_idx], 0)) {
|
||
tiny_tls_refresh_params(class_idx, tls);
|
||
}
|
||
{
|
||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||
void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx));
|
||
tiny_tls_list_guard_push(class_idx, tls, base);
|
||
tls_list_push(tls, base, class_idx);
|
||
}
|
||
if (tls_list_should_spill(tls)) {
|
||
tls_list_spill_excess(class_idx, tls);
|
||
}
|
||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_LOCAL, (uint16_t)class_idx, ptr, 2);
|
||
HAK_STAT_FREE(class_idx);
|
||
return;
|
||
}
|
||
} else {
|
||
// Derive ss from slab (alignment) for TinySlab path
|
||
ss = (SuperSlab*)((uintptr_t)slab & ~(uintptr_t)(2*1024*1024 - 1));
|
||
}
|
||
|
||
#include "tiny_free_magazine.inc.h"
|
||
// ============================================================================
|
||
// Phase 6.23: SuperSlab Allocation Helpers
|
||
// ============================================================================
|
||
|
||
// Phase 6.24: Allocate from SuperSlab slab (lazy freelist + linear allocation)
|
||
#include "tiny_superslab_alloc.inc.h"
|
||
#include "tiny_superslab_free.inc.h"
|
||
|
||
void hak_tiny_free(void* ptr) {
|
||
#if HAKMEM_TINY_FREE_TRACE_COMPILED
|
||
static _Atomic int g_tiny_free_trace = 0;
|
||
if (atomic_fetch_add_explicit(&g_tiny_free_trace, 1, memory_order_relaxed) < 128) {
|
||
HAK_TRACE("[hak_tiny_free_enter]\n");
|
||
}
|
||
#else
|
||
(void)0; // No-op when trace compiled out
|
||
#endif
|
||
// Track total tiny free calls (diagnostics)
|
||
#if HAKMEM_TINY_FREE_CALLS_COMPILED
|
||
extern _Atomic uint64_t g_hak_tiny_free_calls;
|
||
atomic_fetch_add_explicit(&g_hak_tiny_free_calls, 1, memory_order_relaxed);
|
||
#else
|
||
(void)0; // No-op when diagnostic counter compiled out
|
||
#endif
|
||
if (!ptr || !g_tiny_initialized) return;
|
||
|
||
hak_tiny_stats_poll();
|
||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_ENTER, 0, ptr, 0);
|
||
|
||
#ifdef HAKMEM_TINY_BENCH_SLL_ONLY
|
||
// Bench-only SLL-only free: push to TLS SLL for ≤64B when possible
|
||
{
|
||
int class_idx = -1;
|
||
if (g_use_superslab) {
|
||
// Resolve class_idx from per-slab metadata instead of ss->size_class
|
||
SuperSlab* ss = hak_super_lookup(ptr);
|
||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||
// void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr
|
||
int sidx = slab_index_for(ss, ptr);
|
||
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
|
||
TinySlabMeta* m = &ss->slabs[sidx];
|
||
if (m->class_idx < TINY_NUM_CLASSES) {
|
||
class_idx = (int)m->class_idx;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
if (class_idx < 0) {
|
||
TinySlab* slab = hak_tiny_owner_slab(ptr);
|
||
if (slab) class_idx = slab->class_idx;
|
||
}
|
||
if (class_idx >= 0 && class_idx <= 3) {
|
||
uint32_t sll_cap = sll_cap_for_class(class_idx, (uint32_t)TINY_TLS_MAG_CAP);
|
||
if ((int)g_tls_sll[class_idx].count < (int)sll_cap) {
|
||
// CORRUPTION DEBUG: Validate ptr and head before TLS SLL write
|
||
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
|
||
extern const size_t g_tiny_class_sizes[];
|
||
size_t blk = g_tiny_class_sizes[class_idx];
|
||
void* old_head = HAK_BASE_TO_RAW(g_tls_sll[class_idx].head);
|
||
|
||
// Validate ptr alignment
|
||
if (((uintptr_t)ptr % blk) != 0) {
|
||
fprintf(stderr, "[FAST_FREE_CORRUPT] ptr=%p misaligned (cls=%d blk=%zu offset=%zu)\n",
|
||
ptr, class_idx, blk, (uintptr_t)ptr % blk);
|
||
fprintf(stderr, "[FAST_FREE_CORRUPT] Attempting to push corrupted pointer to TLS SLL!\n");
|
||
abort();
|
||
}
|
||
|
||
// Validate old_head alignment if not NULL
|
||
if (old_head && ((uintptr_t)old_head % blk) != 0) {
|
||
fprintf(stderr, "[FAST_FREE_CORRUPT] TLS SLL head=%p already corrupted! (cls=%d blk=%zu offset=%zu)\n",
|
||
old_head, class_idx, blk, (uintptr_t)old_head % blk);
|
||
fprintf(stderr, "[FAST_FREE_CORRUPT] Corruption detected BEFORE fast free write (ptr=%p)\n", ptr);
|
||
abort();
|
||
}
|
||
|
||
fprintf(stderr, "[FAST_FREE] cls=%d ptr=%p old_head=%p count=%u\n",
|
||
class_idx, ptr, old_head, g_tls_sll[class_idx].count);
|
||
}
|
||
|
||
// Phase 10: Convert User -> Base for TLS SLL push
|
||
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
|
||
if (tls_sll_push(class_idx, base_ptr, sll_cap)) {
|
||
return; // Success
|
||
}
|
||
// Fall through if push fails (SLL full or C7)
|
||
}
|
||
}
|
||
}
|
||
#endif
|
||
|
||
if (g_tiny_ultra) {
|
||
int class_idx = -1;
|
||
if (g_use_superslab) {
|
||
// Resolve class_idx from per-slab metadata instead of ss->size_class
|
||
SuperSlab* ss = hak_super_lookup(ptr);
|
||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||
// void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr
|
||
int sidx = slab_index_for(ss, ptr);
|
||
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
|
||
TinySlabMeta* m = &ss->slabs[sidx];
|
||
if (m->class_idx < TINY_NUM_CLASSES) {
|
||
class_idx = (int)m->class_idx;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
if (class_idx < 0) {
|
||
TinySlab* slab = hak_tiny_owner_slab(ptr);
|
||
if (slab) class_idx = slab->class_idx;
|
||
}
|
||
// Phase E1-CORRECT: C7 now has headers, can use TLS SLL like other classes
|
||
if (class_idx >= 0) {
|
||
// Ultra free: push directly to TLS SLL without magazine init
|
||
int sll_cap = ultra_sll_cap_for_class(class_idx);
|
||
if ((int)g_tls_sll[class_idx].count < sll_cap) {
|
||
// CORRUPTION DEBUG: Validate ptr and head before TLS SLL write
|
||
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
|
||
extern const size_t g_tiny_class_sizes[];
|
||
size_t blk = g_tiny_class_sizes[class_idx];
|
||
void* old_head = HAK_BASE_TO_RAW(g_tls_sll[class_idx].head);
|
||
|
||
// Validate ptr alignment
|
||
if (((uintptr_t)ptr % blk) != 0) {
|
||
fprintf(stderr, "[ULTRA_FREE_CORRUPT] ptr=%p misaligned (cls=%d blk=%zu offset=%zu)\n",
|
||
ptr, class_idx, blk, (uintptr_t)ptr % blk);
|
||
fprintf(stderr, "[ULTRA_FREE_CORRUPT] Attempting to push corrupted pointer to TLS SLL!\n");
|
||
abort();
|
||
}
|
||
|
||
// Validate old_head alignment if not NULL
|
||
if (old_head && ((uintptr_t)old_head % blk) != 0) {
|
||
fprintf(stderr, "[ULTRA_FREE_CORRUPT] TLS SLL head=%p already corrupted! (cls=%d blk=%zu offset=%zu)\n",
|
||
old_head, class_idx, blk, (uintptr_t)old_head % blk);
|
||
fprintf(stderr, "[ULTRA_FREE_CORRUPT] Corruption detected BEFORE ultra free write (ptr=%p)\n", ptr);
|
||
abort();
|
||
}
|
||
|
||
fprintf(stderr, "[ULTRA_FREE] cls=%d ptr=%p old_head=%p count=%u\n",
|
||
class_idx, ptr, old_head, g_tls_sll[class_idx].count);
|
||
}
|
||
|
||
// Use Box TLS-SLL API (C7-safe push)
|
||
// Note: C7 already rejected at line 334
|
||
{
|
||
// Phase 10: Convert User -> Base for TLS SLL push
|
||
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
|
||
if (tls_sll_push(class_idx, base_ptr, (uint32_t)sll_cap)) {
|
||
// CORRUPTION DEBUG: Verify write succeeded
|
||
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
|
||
void* base = HAK_BASE_TO_RAW(base_ptr);
|
||
void* readback = tiny_next_read(class_idx, base); // Phase E1-CORRECT: Box API
|
||
(void)readback;
|
||
void* new_head = HAK_BASE_TO_RAW(g_tls_sll[class_idx].head);
|
||
if (new_head != base) {
|
||
fprintf(stderr, "[ULTRA_FREE_CORRUPT] Write verification failed! base=%p new_head=%p\n",
|
||
base, new_head);
|
||
abort();
|
||
}
|
||
}
|
||
return; // Success
|
||
}
|
||
}
|
||
// Fall through if push fails (SLL full)
|
||
}
|
||
}
|
||
// Fallback to existing path if class resolution fails
|
||
}
|
||
|
||
SuperSlab* fast_ss = NULL;
|
||
TinySlab* fast_slab = NULL;
|
||
int fast_class_idx = -1;
|
||
if (g_use_superslab) {
|
||
// Phase 1: Try TLS hint cache first (fast path for Headerless mode)
|
||
#if HAKMEM_TINY_SS_TLS_HINT
|
||
if (!tls_ss_hint_lookup(ptr, &fast_ss)) {
|
||
#endif
|
||
fast_ss = hak_super_lookup(ptr);
|
||
#if HAKMEM_TINY_SS_TLS_HINT
|
||
}
|
||
#endif
|
||
if (fast_ss && fast_ss->magic == SUPERSLAB_MAGIC) {
|
||
// void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr
|
||
int sidx = slab_index_for(fast_ss, ptr);
|
||
if (sidx >= 0 && sidx < ss_slabs_capacity(fast_ss)) {
|
||
TinySlabMeta* m = &fast_ss->slabs[sidx];
|
||
if (m->class_idx < TINY_NUM_CLASSES) {
|
||
fast_class_idx = (int)m->class_idx;
|
||
}
|
||
}
|
||
if (fast_class_idx < 0) {
|
||
fast_ss = NULL;
|
||
}
|
||
} else {
|
||
fast_ss = NULL;
|
||
}
|
||
}
|
||
if (fast_class_idx < 0) {
|
||
fast_slab = hak_tiny_owner_slab(ptr);
|
||
if (fast_slab) fast_class_idx = fast_slab->class_idx;
|
||
}
|
||
// Safety: detect class mismatch (SS vs TinySlab) early
|
||
if (__builtin_expect(g_tiny_safe_free && fast_class_idx >= 0, 0)) {
|
||
int ss_cls = -1, ts_cls = -1;
|
||
SuperSlab* chk_ss = fast_ss ? fast_ss : (g_use_superslab ? hak_super_lookup(ptr) : NULL);
|
||
if (chk_ss && chk_ss->magic == SUPERSLAB_MAGIC) {
|
||
// void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr
|
||
int sidx = slab_index_for(chk_ss, ptr);
|
||
if (sidx >= 0 && sidx < ss_slabs_capacity(chk_ss)) {
|
||
TinySlabMeta* m = &chk_ss->slabs[sidx];
|
||
if (m->class_idx < TINY_NUM_CLASSES) {
|
||
ss_cls = (int)m->class_idx;
|
||
}
|
||
}
|
||
}
|
||
TinySlab* chk_slab = fast_slab ? fast_slab : hak_tiny_owner_slab(ptr);
|
||
if (chk_slab) ts_cls = chk_slab->class_idx;
|
||
if (ss_cls >= 0 && ts_cls >= 0 && ss_cls != ts_cls) {
|
||
uintptr_t packed = ((uintptr_t)(uint16_t)ss_cls << 16) | (uint16_t)ts_cls;
|
||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)fast_class_idx, ptr, packed);
|
||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
|
||
}
|
||
}
|
||
if (fast_class_idx >= 0) {
|
||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_ENTER, (uint16_t)fast_class_idx, ptr, 1);
|
||
}
|
||
if (fast_class_idx >= 0 && g_fast_enable && g_fast_cap[fast_class_idx] != 0) {
|
||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||
hak_base_ptr_t base2 = ptr_user_to_base(HAK_USER_FROM_RAW(ptr), fast_class_idx);
|
||
// PRIORITY 1: Try FastCache first (bypasses SLL when Front-Direct)
|
||
int pushed = 0;
|
||
// Phase 7-Step5: Use config macro for dead code elimination in PGO mode
|
||
if (__builtin_expect(TINY_FRONT_FASTCACHE_ENABLED && fast_class_idx <= 3, 1)) {
|
||
pushed = fastcache_push(fast_class_idx, base2);
|
||
} else {
|
||
pushed = tiny_fast_push(fast_class_idx, base2);
|
||
}
|
||
if (pushed) {
|
||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_FAST, (uint16_t)fast_class_idx, ptr, 0);
|
||
HAK_STAT_FREE(fast_class_idx);
|
||
return;
|
||
}
|
||
}
|
||
|
||
// SuperSlab detection: prefer fast mask-based check when available
|
||
SuperSlab* ss = fast_ss;
|
||
if (!ss && g_use_superslab) {
|
||
// Phase 1: Try TLS hint cache first (fast path for Headerless mode)
|
||
#if HAKMEM_TINY_SS_TLS_HINT
|
||
if (!tls_ss_hint_lookup(ptr, &ss)) {
|
||
#endif
|
||
ss = hak_super_lookup(ptr);
|
||
#if HAKMEM_TINY_SS_TLS_HINT
|
||
}
|
||
#endif
|
||
if (!(ss && ss->magic == SUPERSLAB_MAGIC)) {
|
||
ss = NULL;
|
||
}
|
||
}
|
||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||
// Derive class from per-slab meta
|
||
int cls = -1;
|
||
// void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr
|
||
int sidx = slab_index_for(ss, ptr);
|
||
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
|
||
TinySlabMeta* m = &ss->slabs[sidx];
|
||
if (m->class_idx < TINY_NUM_CLASSES) {
|
||
cls = (int)m->class_idx;
|
||
}
|
||
}
|
||
if (cls < 0) {
|
||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); }
|
||
return;
|
||
}
|
||
hak_tiny_free_superslab(ptr, ss);
|
||
HAK_STAT_FREE(cls);
|
||
return;
|
||
}
|
||
|
||
// Fallback to TinySlab only when SuperSlab is not in use
|
||
TinySlab* slab = fast_slab;
|
||
if (!slab) slab = hak_tiny_owner_slab(ptr);
|
||
if (!slab) return; // Not managed by Tiny Pool
|
||
if (__builtin_expect(g_use_superslab, 0)) {
|
||
// In SS mode, a pointer that resolves only to TinySlab is suspicious → treat as invalid free
|
||
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, 0xEE, ptr, 0xF1u);
|
||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; }
|
||
return;
|
||
}
|
||
|
||
hak_tiny_free_with_slab(ptr, slab);
|
||
}
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_query.c (Phase 2B-1)
|
||
// ============================================================================
|
||
// EXTRACTED: int hak_tiny_is_managed(void* ptr) {
|
||
// EXTRACTED: if (!ptr || !g_tiny_initialized) return 0;
|
||
// EXTRACTED: // Phase 6.12.1: O(1) slab lookup via registry/list
|
||
// EXTRACTED: return hak_tiny_owner_slab(ptr) != NULL || hak_super_lookup(ptr) != NULL;
|
||
// EXTRACTED: }
|
||
|
||
// Phase 7.6: Check if pointer is managed by Tiny Pool (TinySlab OR SuperSlab)
|
||
// EXTRACTED: int hak_tiny_is_managed_superslab(void* ptr) {
|
||
// EXTRACTED: if (!ptr || !g_tiny_initialized) return 0;
|
||
// EXTRACTED:
|
||
// EXTRACTED: // Safety: Only check if g_use_superslab is enabled
|
||
// EXTRACTED: if (g_use_superslab) {
|
||
// EXTRACTED: SuperSlab* ss = hak_super_lookup(ptr);
|
||
// EXTRACTED: // Phase 8.2 optimization: Use alignment check instead of mincore()
|
||
// EXTRACTED: // SuperSlabs are always SUPERSLAB_SIZE-aligned (2MB)
|
||
// EXTRACTED: if (ss && ((uintptr_t)ss & (SUPERSLAB_SIZE - 1)) == 0) {
|
||
// EXTRACTED: if (ss->magic == SUPERSLAB_MAGIC) {
|
||
// EXTRACTED: return 1; // Valid SuperSlab pointer
|
||
// EXTRACTED: }
|
||
// EXTRACTED: }
|
||
// EXTRACTED: }
|
||
// EXTRACTED:
|
||
// EXTRACTED: // Fallback to TinySlab check
|
||
// EXTRACTED: return hak_tiny_owner_slab(ptr) != NULL;
|
||
// EXTRACTED: }
|
||
|
||
// Return the usable size for a Tiny-managed pointer (0 if unknown/not tiny).
|
||
// Prefer SuperSlab metadata when available; otherwise use TinySlab owner class.
|
||
// EXTRACTED: size_t hak_tiny_usable_size(void* ptr) {
|
||
// EXTRACTED: if (!ptr || !g_tiny_initialized) return 0;
|
||
// EXTRACTED:
|
||
// EXTRACTED: // Check SuperSlab first via registry (safe under direct link and LD)
|
||
// EXTRACTED: if (g_use_superslab) {
|
||
// EXTRACTED: SuperSlab* ss = hak_super_lookup(ptr);
|
||
// EXTRACTED: if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||
// EXTRACTED: int k = (int)ss->size_class;
|
||
// EXTRACTED: if (k >= 0 && k < TINY_NUM_CLASSES) {
|
||
// EXTRACTED: return g_tiny_class_sizes[k];
|
||
// EXTRACTED: }
|
||
// EXTRACTED: }
|
||
// EXTRACTED: }
|
||
// EXTRACTED:
|
||
// EXTRACTED: // Fallback: TinySlab owner lookup
|
||
// EXTRACTED: TinySlab* slab = hak_tiny_owner_slab(ptr);
|
||
// EXTRACTED: if (slab) {
|
||
// EXTRACTED: int k = slab->class_idx;
|
||
// EXTRACTED: if (k >= 0 && k < TINY_NUM_CLASSES) {
|
||
// EXTRACTED: return g_tiny_class_sizes[k];
|
||
// EXTRACTED: }
|
||
// EXTRACTED: }
|
||
// EXTRACTED: return 0;
|
||
// EXTRACTED: }
|
||
|
||
|
||
// ============================================================================
|
||
// Statistics and Debug Functions - Extracted to hakmem_tiny_stats.c
|
||
// ============================================================================
|
||
// (Phase 2B API headers moved to top of file)
|
||
|
||
|
||
// Optional shutdown hook to stop background components (e.g., Intelligence Engine)
|
||
void hak_tiny_shutdown(void) {
|
||
// Release TLS SuperSlab references (dec refcount) before stopping BG/INT
|
||
for (int k = 0; k < TINY_NUM_CLASSES; k++) {
|
||
TinyTLSSlab* tls = &g_tls_slabs[k];
|
||
if (tls->ss) {
|
||
superslab_ref_dec(tls->ss);
|
||
tls->ss = NULL;
|
||
tls->meta = NULL;
|
||
tls->slab_base = NULL;
|
||
}
|
||
}
|
||
if (g_int_engine && g_int_started) {
|
||
g_int_stop = 1;
|
||
// Best-effort join; avoid deadlock if called from within the thread
|
||
if (!pthread_equal(tiny_self_pt(), g_int_thread)) {
|
||
pthread_join(g_int_thread, NULL);
|
||
}
|
||
g_int_started = 0;
|
||
g_int_engine = 0;
|
||
}
|
||
}
|
||
|
||
|
||
|
||
|
||
// Always-available: Trim empty slabs (release fully-free slabs)
|