Files
hakmem/core/front/malloc_tiny_fast.h
2025-12-15 12:29:27 +09:00

1067 lines
48 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// malloc_tiny_fast.h - Phase 26: Front Gate Unification (Tiny Fast Path)
//
// Goal: Eliminate 3-layer overhead (malloc → hak_alloc_at → wrapper → tiny_alloc_fast)
// Target: +10-15% performance (11.35M → 12.5-13.5M ops/s)
//
// Design (ChatGPT analysis):
// - Replace: malloc → hak_alloc_at (236 lines) → wrapper (diagnostics) → tiny_alloc_fast
// - With: malloc → malloc_tiny_fast (single-layer, direct to Unified Cache)
// - Preserves: Safety checks (lock depth, initializing, LD_SAFE, jemalloc block)
// - Leverages: Phase 23 Unified Cache (tcache-style, 2-3 cache misses)
//
// Performance:
// - Current overhead: malloc(8.97%) + routing + wrapper(3.63%) + tiny(5.37%) = 17.97%
// - BenchFast ceiling: 8-10 instructions (~1-2% overhead)
// - Gap: ~16%
// - Target: Close half the gap (+10-15% improvement)
//
// ENV Variables:
// HAKMEM_FRONT_GATE_UNIFIED=1 # Enable Front Gate Unification (default: 0, OFF)
#ifndef HAK_FRONT_MALLOC_TINY_FAST_H
#define HAK_FRONT_MALLOC_TINY_FAST_H
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdatomic.h>
#include <pthread.h> // For pthread_self() in cross-thread check
#include "../hakmem_build_flags.h"
#include "../hakmem_tiny_config.h" // For TINY_NUM_CLASSES
#include "../hakmem_super_registry.h" // For cross-thread owner check
#include "../superslab/superslab_inline.h" // For ss_fast_lookup, slab_index_for (Phase 12)
#include "../box/ss_slab_meta_box.h" // For ss_slab_meta_owner_tid_low_get
#include "../box/free_remote_box.h" // For tiny_free_remote_box
#include "tiny_unified_cache.h" // For unified_cache_pop_or_refill
#include "../tiny_region_id.h" // For tiny_region_id_write_header
#include "../hakmem_tiny.h" // For hak_tiny_size_to_class
#include "../box/tiny_env_box.h" // For tiny_env_cfg() (ENV variables)
#include "../box/tiny_front_hot_box.h" // Phase 4-Step2: Hot Path Box
#include "../box/tiny_front_cold_box.h" // Phase 4-Step2: Cold Path Box
#include "../box/tiny_c7_hotbox.h" // Optional: C7 専用ホットボックス
#include "../box/tiny_heap_box.h" // TinyHeap 汎用 Box
#include "../box/tiny_hotheap_v2_box.h" // TinyHotHeap v2 (Phase31 A/B)
#include "../box/smallobject_hotbox_v3_box.h" // SmallObject HotHeap v3 skeleton
#include "../box/smallobject_hotbox_v4_box.h" // SmallObject HotHeap v4 (C7 stub)
#include "../box/smallobject_hotbox_v5_box.h" // SmallObject HotHeap v5 (C6-only route stub, Phase v5-1)
#include "../box/smallobject_core_v6_box.h" // SmallObject Core v6 (Phase V6-HDR-2)
#include "../box/smallobject_v6_env_box.h" // SmallObject v6 ENV control (Phase V6-HDR-2)
#include "../box/smallobject_hotbox_v7_box.h" // SmallObject HotBox v7 stub (Phase v7-1)
#include "../box/smallobject_policy_v7_box.h" // Phase v7-4: Policy Box
#include "../box/tiny_static_route_box.h" // Phase 3 C3: Static routing (policy snapshot bypass)
#include "../box/smallobject_mid_v35_box.h" // Phase v11a-3: MID v3.5 HotBox
#include "../box/tiny_c7_ultra_box.h" // C7 ULTRA stub (UF-1, delegates to v3)
#include "../box/tiny_c6_ultra_free_box.h" // Phase 4-2: C6 ULTRA-free (free-only, C6-only)
#include "../box/tiny_c5_ultra_free_box.h" // Phase 5-1/5-2: C5 ULTRA-free + alloc integration
#include "../box/tiny_c4_ultra_free_box.h" // Phase 6: C4 ULTRA-free + alloc integration (cap=64)
#include "../box/tiny_ultra_tls_box.h" // Phase TLS-UNIFY-1: Unified ULTRA TLS API
#include "../box/tiny_ultra_classes_box.h" // Phase REFACTOR-1: Named constants for C4-C7
#include "../box/tiny_legacy_fallback_box.h" // Phase REFACTOR-2: Legacy fallback logic unification
#include "../box/tiny_ptr_convert_box.h" // Phase REFACTOR-3: Inline pointer macro centralization
#include "../box/tiny_front_v3_env_box.h" // Tiny front v3 snapshot gate
#include "../box/tiny_heap_env_box.h" // ENV gate for TinyHeap front (A/B)
#include "../box/tiny_route_env_box.h" // Route snapshot (Heap vs Legacy)
#include "../box/tiny_front_stats_box.h" // Front class distribution counters
#include "../box/free_path_stats_box.h" // Phase FREE-LEGACY-BREAKDOWN-1: Free path stats
#include "../box/alloc_gate_stats_box.h" // Phase ALLOC-GATE-OPT-1: Alloc gate stats
#include "../box/free_policy_fast_v2_box.h" // Phase POLICY-FAST-PATH-V2: Policy snapshot bypass
#include "../box/free_tiny_fast_hotcold_env_box.h" // Phase FREE-TINY-FAST-HOTCOLD-OPT-1: ENV control
#include "../box/free_tiny_fast_hotcold_stats_box.h" // Phase FREE-TINY-FAST-HOTCOLD-OPT-1: Stats
#include "../box/tiny_metadata_cache_hot_box.h" // Phase 3 C2: Policy hot cache (metadata cache optimization)
#include "../box/tiny_free_route_cache_env_box.h" // Phase 3 D1: Free path route cache
#include "../box/hakmem_env_snapshot_box.h" // Phase 4 E1: ENV snapshot consolidation
#include "../box/free_cold_shape_env_box.h" // Phase 5 E5-3a: Free cold path shape optimization
#include "../box/free_cold_shape_stats_box.h" // Phase 5 E5-3a: Free cold shape stats
#include "../box/free_tiny_fast_mono_dualhot_env_box.h" // Phase 9: MONO DUALHOT ENV gate
#include "../box/free_tiny_fast_mono_legacy_direct_env_box.h" // Phase 10: MONO LEGACY DIRECT ENV gate
// Helper: current thread id (low 32 bits) for owner check
#ifndef TINY_SELF_U32_LOCAL_DEFINED
#define TINY_SELF_U32_LOCAL_DEFINED
static inline uint32_t tiny_self_u32_local(void) {
return (uint32_t)(uintptr_t)pthread_self();
}
#endif
// ============================================================================
// ENV Control (cached, lazy init)
// ============================================================================
// Enable flag (default: 0, OFF)
static inline int front_gate_unified_enabled(void) {
static int g_enable = -1;
if (__builtin_expect(g_enable == -1, 0)) {
const char* e = getenv("HAKMEM_FRONT_GATE_UNIFIED");
g_enable = (e && *e && *e == '0') ? 0 : 1; // default ON
#if !HAKMEM_BUILD_RELEASE
if (g_enable) {
fprintf(stderr, "[FrontGate-INIT] front_gate_unified_enabled() = %d\n", g_enable);
fflush(stderr);
}
#endif
}
return g_enable;
}
// ============================================================================
// Phase REFACTOR-2: Legacy free helper (unified in tiny_legacy_fallback_box.h)
// ============================================================================
// Legacy free handling is encapsulated in tiny_legacy_fallback_box.h
// (Removed inline implementation to avoid duplication)
// ============================================================================
// Phase 4-Step2: malloc_tiny_fast() - Hot/Cold Path Box (ACTIVE)
// ============================================================================
// Ultra-thin Tiny allocation using Hot/Cold Path Box (Phase 4-Step2)
//
// IMPROVEMENTS over Phase 26-A:
// - Branch reduction: Hot path has only 1 branch (cache empty check)
// - Branch hints: TINY_HOT_LIKELY/UNLIKELY for better CPU prediction
// - Hot/Cold separation: Keeps hot path small (better i-cache locality)
// - Explicit fallback: Clear hot→cold transition
//
// PERFORMANCE:
// - Baseline (Phase 26-A, no PGO): 53.3 M ops/s
// - Hot/Cold Box (no PGO): 57.2 M ops/s (+7.3%)
//
// DESIGN:
// 1. size → class_idx (same as Phase 26-A)
// 2. Hot path: tiny_hot_alloc_fast() - cache hit (1 branch)
// 3. Cold path: tiny_cold_refill_and_alloc() - cache miss (noinline, cold)
//
// Preconditions:
// - Called AFTER malloc() safety checks (lock depth, initializing, LD_SAFE)
// - size <= tiny_get_max_size() (caller verified)
// Returns:
// - USER pointer on success
// - NULL on failure (caller falls back to normal path)
//
// Phase ALLOC-TINY-FAST-DUALHOT-2: Probe window ENV gate (safe from early putenv)
static inline int alloc_dualhot_enabled(void) {
static int g = -1;
static int g_probe_left = 64; // Probe window: tolerate early putenv before gate init
if (__builtin_expect(g == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_ALLOC_DUALHOT");
if (e && *e && *e != '0') {
g = 1;
} else if (g_probe_left > 0) {
g_probe_left--;
// Still probing: return "not yet set" without committing 0
if (e == NULL) {
return 0; // Env not set (yet), but keep probing
}
} else {
g = 0; // Probe window exhausted, commit to 0
}
}
return g;
}
// Phase 2 B3: tiny_alloc_route_cold() - Handle rare routes (V7, MID, ULTRA)
// NOTE: noinline to avoid code bloat in hot path, placed in cold section
__attribute__((noinline, cold))
static void* tiny_alloc_route_cold(SmallRouteKind route_kind, int class_idx, size_t size) {
switch (route_kind) {
case SMALL_ROUTE_ULTRA: {
// Phase TLS-UNIFY-1: Unified ULTRA TLS pop for C4-C6 (C7 handled above)
void* base = tiny_ultra_tls_pop((uint8_t)class_idx);
if (TINY_HOT_LIKELY(base != NULL)) {
if (class_idx == 6) FREE_PATH_STAT_INC(c6_ultra_alloc_hit);
else if (class_idx == 5) FREE_PATH_STAT_INC(c5_ultra_alloc_hit);
else if (class_idx == 4) FREE_PATH_STAT_INC(c4_ultra_alloc_hit);
return tiny_base_to_user_inline(base);
}
// ULTRA miss → fallback to LEGACY
break;
}
case SMALL_ROUTE_MID_V35: {
// Phase v11a-3: MID v3.5 allocation
void* v35p = small_mid_v35_alloc(class_idx, size);
if (TINY_HOT_LIKELY(v35p != NULL)) {
return v35p;
}
// MID v3.5 miss → fallback to LEGACY
break;
}
case SMALL_ROUTE_V7: {
// Phase v7: SmallObject v7 allocation (research box)
void* v7p = small_heap_alloc_fast_v7_stub(size, (uint8_t)class_idx);
if (TINY_HOT_LIKELY(v7p != NULL)) {
return v7p;
}
// V7 miss → fallback to LEGACY
break;
}
case SMALL_ROUTE_MID_V3: {
// Phase MID-V3: MID v3 allocation (257-768B, C5-C6)
// Note: MID v3 uses same segment infrastructure as MID v3.5
// For now, delegate to MID v3.5 which handles both
void* v3p = small_mid_v35_alloc(class_idx, size);
if (TINY_HOT_LIKELY(v3p != NULL)) {
return v3p;
}
break;
}
case SMALL_ROUTE_LEGACY:
default:
break;
}
// Fallback: LEGACY unified cache hot/cold path
void* ptr = tiny_hot_alloc_fast(class_idx);
if (TINY_HOT_LIKELY(ptr != NULL)) {
return ptr;
}
return tiny_cold_refill_and_alloc(class_idx);
}
// Phase ALLOC-GATE-SSOT-1: malloc_tiny_fast_for_class() - body (class_idx already known)
__attribute__((always_inline))
static inline void* malloc_tiny_fast_for_class(size_t size, int class_idx) {
// Stats (class_idx already validated by gate)
tiny_front_alloc_stat_inc(class_idx);
ALLOC_GATE_STAT_INC_CLASS(class_idx);
// Phase v11a-5b: C7 ULTRA early-exit (skip policy snapshot for common case)
// This is the most common hot path - avoids TLS policy overhead
// Phase 4 E1: Use ENV snapshot when enabled (consolidates 3 TLS reads → 1)
// Phase 19-3a: Remove UNLIKELY hint (snapshot is ON by default in presets, hint is backwards)
bool c7_ultra_on;
if (hakmem_env_snapshot_enabled()) {
const HakmemEnvSnapshot* env = hakmem_env_snapshot();
c7_ultra_on = env->tiny_c7_ultra_enabled;
} else {
c7_ultra_on = tiny_c7_ultra_enabled_env();
}
if (class_idx == 7 && c7_ultra_on) {
void* ultra_p = tiny_c7_ultra_alloc(size);
if (TINY_HOT_LIKELY(ultra_p != NULL)) {
return ultra_p;
}
// C7 ULTRA miss → fall through to policy-based routing
}
// Phase ALLOC-TINY-FAST-DUALHOT-2: C0-C3 direct path (second hot path)
// Skip expensive policy snapshot and route determination for C0-C3.
// NOTE: Branch only taken if class_idx <= 3 (rare when OFF, frequent when ON)
if ((unsigned)class_idx <= 3u) {
if (alloc_dualhot_enabled()) {
// Direct to LEGACY unified cache (no policy snapshot)
void* ptr = tiny_hot_alloc_fast(class_idx);
if (TINY_HOT_LIKELY(ptr != NULL)) {
return ptr;
}
return tiny_cold_refill_and_alloc(class_idx);
}
}
// 2. Route selection: Static route table (Phase 3 C3) or policy hot cache (Phase 3 C2) or policy snapshot (default)
SmallRouteKind route_kind;
if (tiny_static_route_ready_fast()) {
route_kind = tiny_static_route_get_kind_fast(class_idx);
} else {
// Phase 3 C2: Use policy hot cache if enabled (eliminates policy_snapshot() call)
route_kind = tiny_policy_hot_get_route(class_idx);
}
// Phase 2 B3: Routing dispatch (ENV gate HAKMEM_TINY_ALLOC_ROUTE_SHAPE)
// Optimized: LIKELY on LEGACY (common case), cold helper for rare routes
const tiny_env_cfg_t* env_cfg = tiny_env_cfg();
if (TINY_HOT_LIKELY(env_cfg->alloc_route_shape)) {
// B3 optimized: Prioritize LEGACY with LIKELY hint
if (TINY_HOT_LIKELY(route_kind == SMALL_ROUTE_LEGACY)) {
// Phase 3 C1: TLS cache prefetch (prefetch g_unified_cache[class_idx] to L1)
if (__builtin_expect(env_cfg->tiny_prefetch, 0)) {
__builtin_prefetch(&g_unified_cache[class_idx], 0, 3);
}
// LEGACY fast path: Unified Cache hot/cold
void* ptr = tiny_hot_alloc_fast(class_idx);
if (TINY_HOT_LIKELY(ptr != NULL)) {
return ptr;
}
return tiny_cold_refill_and_alloc(class_idx);
}
// Rare routes: delegate to cold helper
return tiny_alloc_route_cold(route_kind, class_idx, size);
}
// Original dispatch (backward compatible, default)
// 3. Single switch on route_kind (all ENV checks moved to Policy init)
switch (route_kind) {
case SMALL_ROUTE_ULTRA: {
// Phase TLS-UNIFY-1: Unified ULTRA TLS pop for C4-C6 (C7 handled above)
void* base = tiny_ultra_tls_pop((uint8_t)class_idx);
if (TINY_HOT_LIKELY(base != NULL)) {
if (class_idx == 6) FREE_PATH_STAT_INC(c6_ultra_alloc_hit);
else if (class_idx == 5) FREE_PATH_STAT_INC(c5_ultra_alloc_hit);
else if (class_idx == 4) FREE_PATH_STAT_INC(c4_ultra_alloc_hit);
return tiny_base_to_user_inline(base);
}
// ULTRA miss → fallback to LEGACY
break;
}
case SMALL_ROUTE_MID_V35: {
// Phase v11a-3: MID v3.5 allocation
void* v35p = small_mid_v35_alloc(class_idx, size);
if (TINY_HOT_LIKELY(v35p != NULL)) {
return v35p;
}
// MID v3.5 miss → fallback to LEGACY
break;
}
case SMALL_ROUTE_V7: {
// Phase v7: SmallObject v7 allocation (research box)
void* v7p = small_heap_alloc_fast_v7_stub(size, (uint8_t)class_idx);
if (TINY_HOT_LIKELY(v7p != NULL)) {
return v7p;
}
// V7 miss → fallback to LEGACY
break;
}
case SMALL_ROUTE_MID_V3: {
// Phase MID-V3: MID v3 allocation (257-768B, C5-C6)
// Note: MID v3 uses same segment infrastructure as MID v3.5
// For now, delegate to MID v3.5 which handles both
void* v3p = small_mid_v35_alloc(class_idx, size);
if (TINY_HOT_LIKELY(v3p != NULL)) {
return v3p;
}
break;
}
case SMALL_ROUTE_LEGACY:
default:
break;
}
// Phase 3 C1: TLS cache prefetch (prefetch g_unified_cache[class_idx] to L1)
if (__builtin_expect(env_cfg->tiny_prefetch, 0)) {
__builtin_prefetch(&g_unified_cache[class_idx], 0, 3);
}
// LEGACY fallback: Unified Cache hot/cold path
void* ptr = tiny_hot_alloc_fast(class_idx);
if (TINY_HOT_LIKELY(ptr != NULL)) {
return ptr;
}
return tiny_cold_refill_and_alloc(class_idx);
}
// Wrapper: size → class_idx conversion (SSOT)
__attribute__((always_inline))
static inline void* malloc_tiny_fast(size_t size) {
// Phase ALLOC-GATE-OPT-1: カウンタ散布 (1. 関数入口)
ALLOC_GATE_STAT_INC(total_calls);
// Phase ALLOC-GATE-SSOT-1: Single size→class conversion (SSOT)
ALLOC_GATE_STAT_INC(size_to_class_calls);
int class_idx = hak_tiny_size_to_class(size);
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
return NULL;
}
// Delegate to *_for_class (stats tracked inside)
return malloc_tiny_fast_for_class(size, class_idx);
}
// ============================================================================
// Phase FREE-TINY-FAST-HOTCOLD-OPT-1: Hot/Cold split helpers
// ============================================================================
// Cold path: Cross-thread free, TinyHeap routes, and legacy fallback
// (noinline,cold to keep hot path small and I-cache clean)
__attribute__((noinline,cold))
static int free_tiny_fast_cold(void* ptr, void* base, int class_idx)
{
FREE_TINY_FAST_HOTCOLD_STAT_INC(cold_hit);
// Phase 3 D1: Free path route cache (eliminate tiny_route_for_class overhead)
tiny_route_kind_t route;
if (__builtin_expect(tiny_free_static_route_enabled(), 0)) {
// Use cached route (bypasses tiny_route_for_class())
route = g_tiny_route_class[(unsigned)class_idx & 7u];
if (__builtin_expect(route == TINY_ROUTE_LEGACY && !g_tiny_route_snapshot_done, 0)) {
// Fallback if uninitialized
route = tiny_route_for_class((uint8_t)class_idx);
}
} else {
// Standard path
route = tiny_route_for_class((uint8_t)class_idx);
}
const int use_tiny_heap = tiny_route_is_heap_kind(route);
// Phase 4 E1: Use ENV snapshot when enabled (consolidates 3 TLS reads → 1)
// Phase 19-3a: Remove UNLIKELY hint (snapshot is ON by default in presets, hint is backwards)
const TinyFrontV3Snapshot* front_snap;
if (hakmem_env_snapshot_enabled()) {
const HakmemEnvSnapshot* env = hakmem_env_snapshot();
front_snap = env->tiny_front_v3_enabled ? tiny_front_v3_snapshot_get() : NULL;
} else {
front_snap = __builtin_expect(tiny_front_v3_enabled(), 0) ? tiny_front_v3_snapshot_get() : NULL;
}
// TWO-SPEED: SuperSlab registration check is DEBUG-ONLY to keep HOT PATH fast.
// In Release builds, we trust header magic (0xA0) as sufficient validation.
#if !HAKMEM_BUILD_RELEASE
// Superslab 登録確認(誤分類防止)
SuperSlab* ss_guard = hak_super_lookup(ptr);
if (__builtin_expect(!(ss_guard && ss_guard->magic == SUPERSLAB_MAGIC), 0)) {
return 0; // hakmem 管理外 → 通常 free 経路へ
}
#endif // !HAKMEM_BUILD_RELEASE
// Phase 5 E5-3a: Optimized cold path shape
// Strategy: Handle common LEGACY path first (use_tiny_heap==0 in Mixed ~90%+)
// Defer expensive LARSON/cross-thread checks to only when heap routing needed
static __thread int g_cold_shape = -1;
if (__builtin_expect(g_cold_shape == -1, 0)) {
g_cold_shape = free_cold_shape_enabled() ? 1 : 0;
}
if (g_cold_shape == 1) {
// Optimized shape: Check use_tiny_heap FIRST
if (__builtin_expect(!use_tiny_heap, 1)) {
// Most common case in Mixed: LEGACY path, no heap routing
// Skip LARSON/cross-thread check entirely (not needed for legacy)
FREE_COLD_SHAPE_STAT_INC(legacy_fast);
FREE_COLD_SHAPE_STAT_INC(enabled_count);
goto legacy_fallback;
}
// Rare: heap routing needed, do full validation
FREE_COLD_SHAPE_STAT_INC(heap_path);
}
// Baseline shape: LARSON check first (current behavior)
// Cross-thread free detection (Larson MT crash fix, ENV gated) + TinyHeap free path
{
static __thread int g_larson_fix = -1;
if (__builtin_expect(g_larson_fix == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_LARSON_FIX");
g_larson_fix = (e && *e && *e != '0') ? 1 : 0;
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[LARSON_FIX_INIT] g_larson_fix=%d (env=%s)\n", g_larson_fix, e ? e : "NULL");
fflush(stderr);
#endif
}
if (__builtin_expect(g_larson_fix || use_tiny_heap, 0)) {
// Phase 12 optimization: Use fast mask-based lookup (~5-10 cycles vs 50-100)
SuperSlab* ss = ss_fast_lookup(base);
// Phase FREE-LEGACY-BREAKDOWN-1: カウンタ散布 (5. super_lookup 呼び出し)
FREE_PATH_STAT_INC(super_lookup_called);
if (ss) {
int slab_idx = slab_index_for(ss, base);
if (__builtin_expect(slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss), 1)) {
uint32_t self_tid = tiny_self_u32_local();
uint8_t owner_tid_low = ss_slab_meta_owner_tid_low_get(ss, slab_idx);
TinySlabMeta* meta = &ss->slabs[slab_idx];
// LARSON FIX: Use bits 8-15 for comparison (pthread TIDs aligned to 256 bytes)
uint8_t self_tid_cmp = (uint8_t)((self_tid >> 8) & 0xFFu);
#if !HAKMEM_BUILD_RELEASE
static _Atomic uint64_t g_owner_check_count = 0;
uint64_t oc = atomic_fetch_add(&g_owner_check_count, 1);
if (oc < 10) {
fprintf(stderr, "[LARSON_FIX] Owner check: ptr=%p owner_tid_low=0x%02x self_tid_cmp=0x%02x self_tid=0x%08x match=%d\n",
ptr, owner_tid_low, self_tid_cmp, self_tid, (owner_tid_low == self_tid_cmp));
fflush(stderr);
}
#endif
if (__builtin_expect(owner_tid_low != self_tid_cmp, 0)) {
// Cross-thread free → route to remote queue instead of poisoning TLS cache
FREE_TINY_FAST_HOTCOLD_STAT_INC(cold_cross_thread);
#if !HAKMEM_BUILD_RELEASE
static _Atomic uint64_t g_cross_thread_count = 0;
uint64_t ct = atomic_fetch_add(&g_cross_thread_count, 1);
if (ct < 20) {
fprintf(stderr, "[LARSON_FIX] Cross-thread free detected! ptr=%p owner_tid_low=0x%02x self_tid_cmp=0x%02x self_tid=0x%08x\n",
ptr, owner_tid_low, self_tid_cmp, self_tid);
fflush(stderr);
}
#endif
if (tiny_free_remote_box(ss, slab_idx, meta, ptr, self_tid)) {
// Phase FREE-LEGACY-BREAKDOWN-1: カウンタ散布 (6. cross-thread free)
FREE_PATH_STAT_INC(remote_free);
return 1; // handled via remote queue
}
return 0; // remote push failed; fall back to normal path
}
// Same-thread + TinyHeap route → route-based free
if (__builtin_expect(use_tiny_heap, 0)) {
FREE_TINY_FAST_HOTCOLD_STAT_INC(cold_tinyheap);
switch (route) {
case TINY_ROUTE_SMALL_HEAP_V7: {
// Phase v7-1: C6-only v7 stub (MID v3 fallback)
if (small_heap_free_fast_v7_stub(ptr, (uint8_t)class_idx)) {
return 1;
}
break; // fallthrough to legacy
}
case TINY_ROUTE_SMALL_HEAP_V6: {
// Phase V6-HDR-2: Headerless free (ENV gated)
if (small_v6_headerless_route_enabled((uint8_t)class_idx)) {
SmallHeapCtxV6* ctx_v6 = small_heap_ctx_v6();
if (small_v6_headerless_free(ctx_v6, ptr, (uint8_t)class_idx)) {
return 1; // Handled by v6
}
// v6 returned false -> fallback to legacy
}
break; // fallthrough to legacy
}
// Phase v10: v3/v4/v5 removed - routes now handled as LEGACY
case TINY_ROUTE_HOTHEAP_V2:
tiny_hotheap_v2_free((uint8_t)class_idx, base, meta);
// Phase FREE-LEGACY-BREAKDOWN-1: カウンタ散布 (v2 は tiny_heap_v1 にカウント)
FREE_PATH_STAT_INC(tiny_heap_v1_fast);
return 1;
case TINY_ROUTE_HEAP: {
tiny_heap_ctx_t* ctx = tiny_heap_ctx_for_thread();
if (class_idx == 7) {
tiny_c7_free_fast_with_meta(ss, slab_idx, base);
} else {
tiny_heap_free_class_fast_with_meta(ctx, class_idx, ss, slab_idx, base);
}
// Phase FREE-LEGACY-BREAKDOWN-1: カウンタ散布 (9. TinyHeap v1 route)
FREE_PATH_STAT_INC(tiny_heap_v1_fast);
return 1;
}
default:
break;
}
}
}
}
if (use_tiny_heap) {
// fallback: lookup failed but TinyHeap front is ON → use generic TinyHeap free
if (route == TINY_ROUTE_HOTHEAP_V2) {
tiny_hotheap_v2_record_free_fallback((uint8_t)class_idx);
}
// Phase v10: v3/v4 removed - no special fallback
tiny_heap_free_class_fast(tiny_heap_ctx_for_thread(), class_idx, ptr);
return 1;
}
}
}
// Debug: Log free operations (first 5000, all classes)
#if !HAKMEM_BUILD_RELEASE
{
extern _Atomic uint64_t g_debug_op_count;
extern __thread TinyTLSSLL g_tls_sll[];
uint64_t op = atomic_fetch_add(&g_debug_op_count, 1);
// Note: Shares g_debug_op_count with alloc logging, so bump the window.
if (op < 5000) {
fprintf(stderr, "[OP#%04lu FREE] cls=%d ptr=%p base=%p from=free_tiny_fast_cold tls_count_before=%u\n",
(unsigned long)op, class_idx, ptr, base,
g_tls_sll[class_idx].count);
fflush(stderr);
}
}
#endif
// Phase REFACTOR-2: Legacy fallback (use unified helper)
legacy_fallback:
FREE_TINY_FAST_HOTCOLD_STAT_INC(cold_legacy_fallback);
tiny_legacy_fallback_free_base(base, class_idx);
return 1;
}
// Hot path: Fast-path validation + ULTRA/MID/V7 routes
// (always_inline to minimize overhead on critical path)
__attribute__((always_inline))
static inline int free_tiny_fast_hot(void* ptr) {
if (__builtin_expect(!ptr, 0)) {
FREE_TINY_FAST_HOTCOLD_STAT_INC(ret0_null_ptr);
return 0;
}
#if HAKMEM_TINY_HEADER_CLASSIDX
// 1. ページ境界ガード:
// ptr がページ先頭 (offset==0) の場合、ptr-1 は別ページか未マップ領域になる可能性がある。
// その場合はヘッダ読みを行わず、通常 free 経路にフォールバックする。
uintptr_t off = (uintptr_t)ptr & 0xFFFu;
if (__builtin_expect(off == 0, 0)) {
FREE_TINY_FAST_HOTCOLD_STAT_INC(ret0_page_boundary);
return 0;
}
// 2. Fast header magic validation (必須)
// Release ビルドでは tiny_region_id_read_header() が magic を省略するため、
// ここで自前に Tiny 専用ヘッダ (0xA0) を検証しておく。
uint8_t* header_ptr = (uint8_t*)ptr - 1;
uint8_t header = *header_ptr;
uint8_t magic = header & 0xF0u;
if (__builtin_expect(magic != HEADER_MAGIC, 0)) {
// Tiny ヘッダではない → Mid/Large/外部ポインタなので通常 free 経路へ
FREE_TINY_FAST_HOTCOLD_STAT_INC(ret0_bad_magic);
return 0;
}
// 3. class_idx 抽出下位4bit
int class_idx = (int)(header & HEADER_CLASS_MASK);
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
FREE_TINY_FAST_HOTCOLD_STAT_INC(ret0_bad_class);
return 0;
}
// 4. BASE を計算して Unified Cache に push
void* base = tiny_user_to_base_inline(ptr);
tiny_front_free_stat_inc(class_idx);
// Phase FREE-LEGACY-BREAKDOWN-1: カウンタ散布 (1. 関数入口)
FREE_PATH_STAT_INC(total_calls);
// Phase v11b-1: C7 ULTRA early-exit (skip policy snapshot for most common case)
// Phase 4 E1: Use ENV snapshot when enabled (consolidates 3 TLS reads → 1)
// Phase 19-3a: Remove UNLIKELY hint (snapshot is ON by default in presets, hint is backwards)
bool c7_ultra_free;
if (hakmem_env_snapshot_enabled()) {
const HakmemEnvSnapshot* env = hakmem_env_snapshot();
c7_ultra_free = env->tiny_c7_ultra_enabled;
} else {
c7_ultra_free = tiny_c7_ultra_enabled_env();
}
if (class_idx == 7 && c7_ultra_free) {
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_c7_ultra);
tiny_c7_ultra_free(ptr);
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_hit);
return 1;
}
// Phase FREE-TINY-FAST-DUALHOT-1: C0-C3 direct path (48% of calls)
// Skip expensive policy snapshot and route determination, direct to legacy fallback.
// Safety: Check Larson mode (cross-thread free handling requires full validation path)
{
static __thread int g_larson_fix = -1;
if (__builtin_expect(g_larson_fix == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_LARSON_FIX");
g_larson_fix = (e && *e && *e != '0') ? 1 : 0;
}
if (__builtin_expect(class_idx <= 3 && !g_larson_fix, 1)) {
// C0-C3 + Larson mode OFF → Direct to legacy (no policy snapshot overhead)
tiny_legacy_fallback_free_base(base, class_idx);
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_hit);
return 1;
}
}
// Phase POLICY-FAST-PATH-V2: Skip policy snapshot for known-legacy classes
if (free_policy_fast_v2_can_skip((uint8_t)class_idx)) {
FREE_PATH_STAT_INC(policy_fast_v2_skip);
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_policy_fast_skip);
goto cold_path; // Delegate to cold path for legacy handling
}
// Phase v11b-1: Policy-based single switch (replaces serial ULTRA checks)
const SmallPolicyV7* policy_free = small_policy_v7_snapshot();
SmallRouteKind route_kind_free = policy_free->route_kind[class_idx];
switch (route_kind_free) {
case SMALL_ROUTE_ULTRA: {
// Phase TLS-UNIFY-1: Unified ULTRA TLS push for C4-C6 (C7 handled above)
if (class_idx >= 4 && class_idx <= 6) {
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_ultra_tls);
tiny_ultra_tls_push((uint8_t)class_idx, base);
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_hit);
return 1;
}
// ULTRA for other classes → fallback to cold path
break;
}
case SMALL_ROUTE_MID_V35: {
// Phase v11a-3: MID v3.5 free
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_mid_v35);
small_mid_v35_free(ptr, class_idx);
FREE_PATH_STAT_INC(smallheap_v7_fast);
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_hit);
return 1;
}
case SMALL_ROUTE_V7: {
// Phase v7: SmallObject v7 free (research box)
if (small_heap_free_fast_v7_stub(ptr, (uint8_t)class_idx)) {
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_v7);
FREE_PATH_STAT_INC(smallheap_v7_fast);
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_hit);
return 1;
}
// V7 miss → fallback to cold path
break;
}
case SMALL_ROUTE_MID_V3: {
// Phase MID-V3: delegate to MID v3.5
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_mid_v35);
small_mid_v35_free(ptr, class_idx);
FREE_PATH_STAT_INC(smallheap_v7_fast);
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_hit);
return 1;
}
case SMALL_ROUTE_LEGACY:
default:
break;
}
cold_path:
// Delegate to cold path for cross-thread, TinyHeap, and legacy handling
return free_tiny_fast_cold(ptr, base, class_idx);
#else
// No header mode - fall back to normal free
return 0;
#endif
}
// ============================================================================
// Phase 26-B: free_tiny_fast() - Ultra-thin Tiny deallocation
// ============================================================================
// Single-layer Tiny deallocation (bypasses hak_free_at + wrapper + diagnostics)
// Preconditions:
// - ptr is from malloc_tiny_fast() (has valid header)
// - Front Gate Unified is enabled
// Returns:
// - 1 on success (pushed to Unified Cache)
// - 0 on failure (caller falls back to normal free path)
__attribute__((always_inline))
static inline int free_tiny_fast(void* ptr) {
if (__builtin_expect(!ptr, 0)) return 0;
#if HAKMEM_TINY_HEADER_CLASSIDX
// 1. ページ境界ガード:
// ptr がページ先頭 (offset==0) の場合、ptr-1 は別ページか未マップ領域になる可能性がある。
// その場合はヘッダ読みを行わず、通常 free 経路にフォールバックする。
uintptr_t off = (uintptr_t)ptr & 0xFFFu;
if (__builtin_expect(off == 0, 0)) {
return 0;
}
// 2. Fast header magic validation (必須)
// Release ビルドでは tiny_region_id_read_header() が magic を省略するため、
// ここで自前に Tiny 専用ヘッダ (0xA0) を検証しておく。
uint8_t* header_ptr = (uint8_t*)ptr - 1;
uint8_t header = *header_ptr;
uint8_t magic = header & 0xF0u;
if (__builtin_expect(magic != HEADER_MAGIC, 0)) {
// Tiny ヘッダではない → Mid/Large/外部ポインタなので通常 free 経路へ
return 0;
}
// 3. class_idx 抽出下位4bit
int class_idx = (int)(header & HEADER_CLASS_MASK);
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
return 0;
}
// 4. BASE を計算して Unified Cache に push
void* base = tiny_user_to_base_inline(ptr);
tiny_front_free_stat_inc(class_idx);
// Phase FREE-LEGACY-BREAKDOWN-1: カウンタ散布 (1. 関数入口)
FREE_PATH_STAT_INC(total_calls);
// Phase 9: MONO DUALHOT early-exit for C0-C3 (skip policy snapshot, direct to legacy)
// Conditions:
// - ENV: HAKMEM_FREE_TINY_FAST_MONO_DUALHOT=1
// - class_idx <= 3 (C0-C3)
// - !HAKMEM_TINY_LARSON_FIX (cross-thread handling requires full validation)
// - g_tiny_route_snapshot_done == 1 && route == TINY_ROUTE_LEGACY (断定できないときは既存経路)
if ((unsigned)class_idx <= 3u) {
if (free_tiny_fast_mono_dualhot_enabled()) {
static __thread int g_larson_fix = -1;
if (__builtin_expect(g_larson_fix == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_LARSON_FIX");
g_larson_fix = (e && *e && *e != '0') ? 1 : 0;
}
if (!g_larson_fix &&
g_tiny_route_snapshot_done == 1 &&
g_tiny_route_class[class_idx] == TINY_ROUTE_LEGACY) {
// Direct path: Skip policy snapshot, go straight to legacy fallback
FREE_PATH_STAT_INC(mono_dualhot_hit);
tiny_legacy_fallback_free_base(base, class_idx);
return 1;
}
}
}
// Phase 10: MONO LEGACY DIRECT early-exit for C4-C7 (skip policy snapshot, direct to legacy)
// Conditions:
// - ENV: HAKMEM_FREE_TINY_FAST_MONO_LEGACY_DIRECT=1
// - cached nonlegacy_mask: class is NOT in non-legacy mask (= ULTRA/MID/V7 not active)
// - g_tiny_route_snapshot_done == 1 && route == TINY_ROUTE_LEGACY (断定できないときは既存経路)
// - !HAKMEM_TINY_LARSON_FIX (cross-thread handling requires full validation)
if (free_tiny_fast_mono_legacy_direct_enabled()) {
// 1. Check nonlegacy mask (computed once at init)
uint8_t nonlegacy_mask = free_tiny_fast_mono_legacy_direct_nonlegacy_mask();
if ((nonlegacy_mask & (1u << class_idx)) == 0) {
// 2. Check route snapshot
if (g_tiny_route_snapshot_done == 1 && g_tiny_route_class[class_idx] == TINY_ROUTE_LEGACY) {
// 3. Check Larson fix
static __thread int g_larson_fix = -1;
if (__builtin_expect(g_larson_fix == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_LARSON_FIX");
g_larson_fix = (e && *e && *e != '0') ? 1 : 0;
}
if (!g_larson_fix) {
// Direct path: Skip policy snapshot, go straight to legacy fallback
FREE_PATH_STAT_INC(mono_legacy_direct_hit);
tiny_legacy_fallback_free_base(base, class_idx);
return 1;
}
}
}
}
// Phase v11b-1: C7 ULTRA early-exit (skip policy snapshot for most common case)
// Phase 4 E1: Use ENV snapshot when enabled (consolidates 3 TLS reads → 1)
// Phase 19-3a: Remove UNLIKELY hint (snapshot is ON by default in presets, hint is backwards)
bool c7_ultra_free;
if (hakmem_env_snapshot_enabled()) {
const HakmemEnvSnapshot* env = hakmem_env_snapshot();
c7_ultra_free = env->tiny_c7_ultra_enabled;
} else {
c7_ultra_free = tiny_c7_ultra_enabled_env();
}
if (class_idx == 7 && c7_ultra_free) {
tiny_c7_ultra_free(ptr);
return 1;
}
// Phase POLICY-FAST-PATH-V2: Skip policy snapshot for known-legacy classes
if (free_policy_fast_v2_can_skip((uint8_t)class_idx)) {
FREE_PATH_STAT_INC(policy_fast_v2_skip);
goto legacy_fallback;
}
// Phase v11b-1: Policy-based single switch (replaces serial ULTRA checks)
const SmallPolicyV7* policy_free = small_policy_v7_snapshot();
SmallRouteKind route_kind_free = policy_free->route_kind[class_idx];
switch (route_kind_free) {
case SMALL_ROUTE_ULTRA: {
// Phase TLS-UNIFY-1: Unified ULTRA TLS push for C4-C6 (C7 handled above)
if (class_idx >= 4 && class_idx <= 6) {
tiny_ultra_tls_push((uint8_t)class_idx, base);
return 1;
}
// ULTRA for other classes → fallback to LEGACY
break;
}
case SMALL_ROUTE_MID_V35: {
// Phase v11a-3: MID v3.5 free
small_mid_v35_free(ptr, class_idx);
FREE_PATH_STAT_INC(smallheap_v7_fast);
return 1;
}
case SMALL_ROUTE_V7: {
// Phase v7: SmallObject v7 free (research box)
if (small_heap_free_fast_v7_stub(ptr, (uint8_t)class_idx)) {
FREE_PATH_STAT_INC(smallheap_v7_fast);
return 1;
}
// V7 miss → fallback to LEGACY
break;
}
case SMALL_ROUTE_MID_V3: {
// Phase MID-V3: delegate to MID v3.5
small_mid_v35_free(ptr, class_idx);
FREE_PATH_STAT_INC(smallheap_v7_fast);
return 1;
}
case SMALL_ROUTE_LEGACY:
default:
break;
}
legacy_fallback:
// LEGACY fallback path
// Phase 3 D1: Free path route cache (eliminate tiny_route_for_class overhead)
tiny_route_kind_t route;
if (__builtin_expect(tiny_free_static_route_enabled(), 0)) {
// Use cached route (bypasses tiny_route_for_class())
route = g_tiny_route_class[(unsigned)class_idx & 7u];
if (__builtin_expect(route == TINY_ROUTE_LEGACY && !g_tiny_route_snapshot_done, 0)) {
// Fallback if uninitialized
route = tiny_route_for_class((uint8_t)class_idx);
}
} else {
// Standard path
route = tiny_route_for_class((uint8_t)class_idx);
}
const int use_tiny_heap = tiny_route_is_heap_kind(route);
// Phase 4 E1: Use ENV snapshot when enabled (consolidates 3 TLS reads → 1)
// Phase 19-3a: Remove UNLIKELY hint (snapshot is ON by default in presets, hint is backwards)
const TinyFrontV3Snapshot* front_snap;
if (hakmem_env_snapshot_enabled()) {
const HakmemEnvSnapshot* env = hakmem_env_snapshot();
front_snap = env->tiny_front_v3_enabled ? tiny_front_v3_snapshot_get() : NULL;
} else {
front_snap = __builtin_expect(tiny_front_v3_enabled(), 0) ? tiny_front_v3_snapshot_get() : NULL;
}
// TWO-SPEED: SuperSlab registration check is DEBUG-ONLY to keep HOT PATH fast.
// In Release builds, we trust header magic (0xA0) as sufficient validation.
#if !HAKMEM_BUILD_RELEASE
// 5. Superslab 登録確認(誤分類防止)
SuperSlab* ss_guard = hak_super_lookup(ptr);
if (__builtin_expect(!(ss_guard && ss_guard->magic == SUPERSLAB_MAGIC), 0)) {
return 0; // hakmem 管理外 → 通常 free 経路へ
}
#endif // !HAKMEM_BUILD_RELEASE
// Cross-thread free detection (Larson MT crash fix, ENV gated) + TinyHeap free path
{
static __thread int g_larson_fix = -1;
if (__builtin_expect(g_larson_fix == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_LARSON_FIX");
g_larson_fix = (e && *e && *e != '0') ? 1 : 0;
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[LARSON_FIX_INIT] g_larson_fix=%d (env=%s)\n", g_larson_fix, e ? e : "NULL");
fflush(stderr);
#endif
}
if (__builtin_expect(g_larson_fix || use_tiny_heap, 0)) {
// Phase 12 optimization: Use fast mask-based lookup (~5-10 cycles vs 50-100)
SuperSlab* ss = ss_fast_lookup(base);
// Phase FREE-LEGACY-BREAKDOWN-1: カウンタ散布 (5. super_lookup 呼び出し)
FREE_PATH_STAT_INC(super_lookup_called);
if (ss) {
int slab_idx = slab_index_for(ss, base);
if (__builtin_expect(slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss), 1)) {
uint32_t self_tid = tiny_self_u32_local();
uint8_t owner_tid_low = ss_slab_meta_owner_tid_low_get(ss, slab_idx);
TinySlabMeta* meta = &ss->slabs[slab_idx];
// LARSON FIX: Use bits 8-15 for comparison (pthread TIDs aligned to 256 bytes)
uint8_t self_tid_cmp = (uint8_t)((self_tid >> 8) & 0xFFu);
#if !HAKMEM_BUILD_RELEASE
static _Atomic uint64_t g_owner_check_count = 0;
uint64_t oc = atomic_fetch_add(&g_owner_check_count, 1);
if (oc < 10) {
fprintf(stderr, "[LARSON_FIX] Owner check: ptr=%p owner_tid_low=0x%02x self_tid_cmp=0x%02x self_tid=0x%08x match=%d\n",
ptr, owner_tid_low, self_tid_cmp, self_tid, (owner_tid_low == self_tid_cmp));
fflush(stderr);
}
#endif
if (__builtin_expect(owner_tid_low != self_tid_cmp, 0)) {
// Cross-thread free → route to remote queue instead of poisoning TLS cache
#if !HAKMEM_BUILD_RELEASE
static _Atomic uint64_t g_cross_thread_count = 0;
uint64_t ct = atomic_fetch_add(&g_cross_thread_count, 1);
if (ct < 20) {
fprintf(stderr, "[LARSON_FIX] Cross-thread free detected! ptr=%p owner_tid_low=0x%02x self_tid_cmp=0x%02x self_tid=0x%08x\n",
ptr, owner_tid_low, self_tid_cmp, self_tid);
fflush(stderr);
}
#endif
if (tiny_free_remote_box(ss, slab_idx, meta, ptr, self_tid)) {
// Phase FREE-LEGACY-BREAKDOWN-1: カウンタ散布 (6. cross-thread free)
FREE_PATH_STAT_INC(remote_free);
return 1; // handled via remote queue
}
return 0; // remote push failed; fall back to normal path
}
// Same-thread + TinyHeap route → route-based free
if (__builtin_expect(use_tiny_heap, 0)) {
switch (route) {
case TINY_ROUTE_SMALL_HEAP_V7: {
// Phase v7-1: C6-only v7 stub (MID v3 fallback)
if (small_heap_free_fast_v7_stub(ptr, (uint8_t)class_idx)) {
return 1;
}
break; // fallthrough to legacy
}
case TINY_ROUTE_SMALL_HEAP_V6: {
// Phase V6-HDR-2: Headerless free (ENV gated)
if (small_v6_headerless_route_enabled((uint8_t)class_idx)) {
SmallHeapCtxV6* ctx_v6 = small_heap_ctx_v6();
if (small_v6_headerless_free(ctx_v6, ptr, (uint8_t)class_idx)) {
return 1; // Handled by v6
}
// v6 returned false -> fallback to legacy
}
break; // fallthrough to legacy
}
// Phase v10: v3/v4/v5 removed - routes now handled as LEGACY
case TINY_ROUTE_HOTHEAP_V2:
tiny_hotheap_v2_free((uint8_t)class_idx, base, meta);
// Phase FREE-LEGACY-BREAKDOWN-1: カウンタ散布 (v2 は tiny_heap_v1 にカウント)
FREE_PATH_STAT_INC(tiny_heap_v1_fast);
return 1;
case TINY_ROUTE_HEAP: {
tiny_heap_ctx_t* ctx = tiny_heap_ctx_for_thread();
if (class_idx == 7) {
tiny_c7_free_fast_with_meta(ss, slab_idx, base);
} else {
tiny_heap_free_class_fast_with_meta(ctx, class_idx, ss, slab_idx, base);
}
// Phase FREE-LEGACY-BREAKDOWN-1: カウンタ散布 (9. TinyHeap v1 route)
FREE_PATH_STAT_INC(tiny_heap_v1_fast);
return 1;
}
default:
break;
}
}
}
}
if (use_tiny_heap) {
// fallback: lookup failed but TinyHeap front is ON → use generic TinyHeap free
if (route == TINY_ROUTE_HOTHEAP_V2) {
tiny_hotheap_v2_record_free_fallback((uint8_t)class_idx);
}
// Phase v10: v3/v4 removed - no special fallback
tiny_heap_free_class_fast(tiny_heap_ctx_for_thread(), class_idx, ptr);
return 1;
}
}
}
// Debug: Log free operations (first 5000, all classes)
#if !HAKMEM_BUILD_RELEASE
{
extern _Atomic uint64_t g_debug_op_count;
extern __thread TinyTLSSLL g_tls_sll[];
uint64_t op = atomic_fetch_add(&g_debug_op_count, 1);
// Note: Shares g_debug_op_count with alloc logging, so bump the window.
if (op < 5000) {
fprintf(stderr, "[OP#%04lu FREE] cls=%d ptr=%p base=%p from=free_tiny_fast tls_count_before=%u\n",
(unsigned long)op, class_idx, ptr, base,
g_tls_sll[class_idx].count);
fflush(stderr);
}
}
#endif
// Phase REFACTOR-2: Legacy fallback (use unified helper)
tiny_legacy_fallback_free_base(base, class_idx);
return 1;
#else
// No header mode - fall back to normal free
return 0;
#endif
}
#endif // HAK_FRONT_MALLOC_TINY_FAST_H