Phase FREE-TINY-FAST-HOTCOLD-OPT-1: Hot/Cold split for free_tiny_fast [RESEARCH BOX - FREEZE]
Split free_tiny_fast() into hot and cold paths to reduce I-cache pressure: - free_tiny_fast_hot(): always_inline, fast-path validation + ULTRA/MID/V7 - free_tiny_fast_cold(): noinline,cold, cross-thread + TinyHeap + legacy ENV: HAKMEM_FREE_TINY_FAST_HOTCOLD=0/1 (default 0) Stats: HAKMEM_FREE_TINY_FAST_HOTCOLD_STATS=0/1 (TLS only, exit dump) ## Benchmark Results (random mixed, 100M ops) HOTCOLD=0 (legacy): 49.35M, 50.18M, 50.25M ops/s (median: 50.18M) HOTCOLD=1 (split): 43.54M, 43.59M, 43.62M ops/s (median: 43.59M) **Regression: -13.1%** (NO-GO) ## Stats Analysis (10M ops, HOTCOLD_STATS=1) Hot path: 50.11% (C7 ULTRA early-exit) Cold path: 48.43% (legacy fallback) ## Root Cause Design assumption FAILED: "Cold path is rare" Reality: Cold path is 48% (almost as common as hot path) The split introduces: 1. Extra dispatch overhead in hot path 2. Function call overhead to cold for ~48% of frees 3. "Cold" is NOT rare - it's the legacy fallback for non-ULTRA classes ## Conclusion **FREEZE as research box (default OFF)** Box Theory value: - Validated hot/cold distribution via TLS stats - Confirmed that legacy fallback is NOT rare (48%) - Demonstrated that naive hot/cold split hurts when "cold" is common Alternative approaches for future work: 1. Inline the legacy fallback in hot path (no split) 2. Route-specific specialization (C7 vs non-C7 separate paths) 3. Policy-based early routing (before header validation) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
31
core/box/free_tiny_fast_hotcold_env_box.h
Normal file
31
core/box/free_tiny_fast_hotcold_env_box.h
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
// free_tiny_fast_hotcold_env_box.h
|
||||||
|
// Phase FREE-TINY-FAST-HOTCOLD-OPT-1: ENV control for hot/cold split
|
||||||
|
//
|
||||||
|
// Design:
|
||||||
|
// - ENV: HAKMEM_FREE_TINY_FAST_HOTCOLD=0/1 (default 0)
|
||||||
|
// - Lazy init, cached static
|
||||||
|
// - Used to A/B test hot/cold split vs legacy monolithic function
|
||||||
|
|
||||||
|
#ifndef HAK_FREE_TINY_FAST_HOTCOLD_ENV_BOX_H
|
||||||
|
#define HAK_FREE_TINY_FAST_HOTCOLD_ENV_BOX_H
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "../hakmem_build_flags.h"
|
||||||
|
|
||||||
|
static inline int hak_free_tiny_fast_hotcold_enabled(void) {
|
||||||
|
static int g = -1;
|
||||||
|
if (__builtin_expect(g == -1, 0)) {
|
||||||
|
const char* e = getenv("HAKMEM_FREE_TINY_FAST_HOTCOLD");
|
||||||
|
g = (e && *e == '1') ? 1 : 0;
|
||||||
|
#if !HAKMEM_BUILD_RELEASE
|
||||||
|
if (g) {
|
||||||
|
fprintf(stderr, "[FREE_TINY_FAST_HOTCOLD] Enabled (hot/cold split)\n");
|
||||||
|
fflush(stderr);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
return g;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // HAK_FREE_TINY_FAST_HOTCOLD_ENV_BOX_H
|
||||||
130
core/box/free_tiny_fast_hotcold_stats_box.h
Normal file
130
core/box/free_tiny_fast_hotcold_stats_box.h
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
// free_tiny_fast_hotcold_stats_box.h
|
||||||
|
// Phase FREE-TINY-FAST-HOTCOLD-OPT-1: Hot/Cold split stats (TLS only)
|
||||||
|
//
|
||||||
|
// Design:
|
||||||
|
// - TLS counters only (NO global atomic to avoid contention)
|
||||||
|
// - Exit dump via destructor
|
||||||
|
// - ENV: HAKMEM_FREE_TINY_FAST_HOTCOLD_STATS=0/1 (default 0)
|
||||||
|
//
|
||||||
|
// Stats tracked:
|
||||||
|
// - hot_hit: Hot path completed successfully
|
||||||
|
// - hot_c7_ultra: C7 ULTRA early-exit
|
||||||
|
// - hot_ultra_tls: C4-C6 ULTRA TLS push
|
||||||
|
// - hot_mid_v35: MID v3.5 free
|
||||||
|
// - cold_hit: Cold path called
|
||||||
|
// - cold_cross_thread: Cross-thread free (Larson fix)
|
||||||
|
// - cold_tinyheap: TinyHeap route
|
||||||
|
// - cold_legacy_fallback: Legacy fallback
|
||||||
|
// - ret0_null_ptr: NULL pointer check
|
||||||
|
// - ret0_page_boundary: Page boundary guard
|
||||||
|
// - ret0_bad_magic: Invalid header magic
|
||||||
|
// - ret0_bad_class: Invalid class_idx
|
||||||
|
|
||||||
|
#ifndef HAK_FREE_TINY_FAST_HOTCOLD_STATS_BOX_H
|
||||||
|
#define HAK_FREE_TINY_FAST_HOTCOLD_STATS_BOX_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "../hakmem_build_flags.h"
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// ENV Control
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
static inline int free_tiny_fast_hotcold_stats_enabled(void) {
|
||||||
|
static int g = -1;
|
||||||
|
if (__builtin_expect(g == -1, 0)) {
|
||||||
|
const char* e = getenv("HAKMEM_FREE_TINY_FAST_HOTCOLD_STATS");
|
||||||
|
g = (e && *e == '1') ? 1 : 0;
|
||||||
|
#if !HAKMEM_BUILD_RELEASE
|
||||||
|
if (g) {
|
||||||
|
fprintf(stderr, "[FREE_TINY_FAST_HOTCOLD_STATS] Enabled\n");
|
||||||
|
fflush(stderr);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
return g;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// TLS Stats Counters
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
uint64_t hot_hit;
|
||||||
|
uint64_t hot_c7_ultra;
|
||||||
|
uint64_t hot_ultra_tls;
|
||||||
|
uint64_t hot_mid_v35;
|
||||||
|
uint64_t hot_v7;
|
||||||
|
uint64_t hot_policy_fast_skip;
|
||||||
|
uint64_t cold_hit;
|
||||||
|
uint64_t cold_cross_thread;
|
||||||
|
uint64_t cold_tinyheap;
|
||||||
|
uint64_t cold_legacy_fallback;
|
||||||
|
uint64_t ret0_null_ptr;
|
||||||
|
uint64_t ret0_page_boundary;
|
||||||
|
uint64_t ret0_bad_magic;
|
||||||
|
uint64_t ret0_bad_class;
|
||||||
|
} FreeTinyFastHotColdStats;
|
||||||
|
|
||||||
|
static __thread FreeTinyFastHotColdStats t_free_tiny_fast_hotcold_stats = {0};
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Stats Increment Macros
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
#define FREE_TINY_FAST_HOTCOLD_STAT_INC(field) \
|
||||||
|
do { \
|
||||||
|
if (__builtin_expect(free_tiny_fast_hotcold_stats_enabled(), 0)) { \
|
||||||
|
t_free_tiny_fast_hotcold_stats.field++; \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Stats Dump (destructor)
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
__attribute__((destructor))
|
||||||
|
static void free_tiny_fast_hotcold_stats_dump(void) {
|
||||||
|
if (!free_tiny_fast_hotcold_stats_enabled()) return;
|
||||||
|
|
||||||
|
FreeTinyFastHotColdStats* s = &t_free_tiny_fast_hotcold_stats;
|
||||||
|
|
||||||
|
// Only dump if this thread did any work
|
||||||
|
uint64_t total = s->hot_hit + s->cold_hit + s->ret0_null_ptr +
|
||||||
|
s->ret0_page_boundary + s->ret0_bad_magic + s->ret0_bad_class;
|
||||||
|
if (total == 0) return;
|
||||||
|
|
||||||
|
fprintf(stderr, "\n[FREE_TINY_FAST_HOTCOLD_STATS] Thread summary:\n");
|
||||||
|
fprintf(stderr, " Hot path:\n");
|
||||||
|
fprintf(stderr, " hot_hit = %12llu\n", (unsigned long long)s->hot_hit);
|
||||||
|
fprintf(stderr, " hot_c7_ultra = %12llu\n", (unsigned long long)s->hot_c7_ultra);
|
||||||
|
fprintf(stderr, " hot_ultra_tls = %12llu\n", (unsigned long long)s->hot_ultra_tls);
|
||||||
|
fprintf(stderr, " hot_mid_v35 = %12llu\n", (unsigned long long)s->hot_mid_v35);
|
||||||
|
fprintf(stderr, " hot_v7 = %12llu\n", (unsigned long long)s->hot_v7);
|
||||||
|
fprintf(stderr, " hot_policy_fast_skip= %12llu\n", (unsigned long long)s->hot_policy_fast_skip);
|
||||||
|
fprintf(stderr, " Cold path:\n");
|
||||||
|
fprintf(stderr, " cold_hit = %12llu\n", (unsigned long long)s->cold_hit);
|
||||||
|
fprintf(stderr, " cold_cross_thread = %12llu\n", (unsigned long long)s->cold_cross_thread);
|
||||||
|
fprintf(stderr, " cold_tinyheap = %12llu\n", (unsigned long long)s->cold_tinyheap);
|
||||||
|
fprintf(stderr, " cold_legacy_fallback= %12llu\n", (unsigned long long)s->cold_legacy_fallback);
|
||||||
|
fprintf(stderr, " Early returns (ret0):\n");
|
||||||
|
fprintf(stderr, " ret0_null_ptr = %12llu\n", (unsigned long long)s->ret0_null_ptr);
|
||||||
|
fprintf(stderr, " ret0_page_boundary = %12llu\n", (unsigned long long)s->ret0_page_boundary);
|
||||||
|
fprintf(stderr, " ret0_bad_magic = %12llu\n", (unsigned long long)s->ret0_bad_magic);
|
||||||
|
fprintf(stderr, " ret0_bad_class = %12llu\n", (unsigned long long)s->ret0_bad_class);
|
||||||
|
fprintf(stderr, " Total calls = %12llu\n", (unsigned long long)total);
|
||||||
|
|
||||||
|
// Hot/Cold ratio
|
||||||
|
if (total > 0) {
|
||||||
|
double hot_pct = (s->hot_hit * 100.0) / total;
|
||||||
|
double cold_pct = (s->cold_hit * 100.0) / total;
|
||||||
|
fprintf(stderr, " Hot ratio = %10.2f%%\n", hot_pct);
|
||||||
|
fprintf(stderr, " Cold ratio = %10.2f%%\n", cold_pct);
|
||||||
|
}
|
||||||
|
|
||||||
|
fflush(stderr);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // HAK_FREE_TINY_FAST_HOTCOLD_STATS_BOX_H
|
||||||
@ -270,7 +270,13 @@ void free(void* ptr) {
|
|||||||
// Phase 4-Step3: Use config macro for compile-time optimization
|
// Phase 4-Step3: Use config macro for compile-time optimization
|
||||||
// Phase 7-Step1: Changed expect hint from 0→1 (unified path is now LIKELY)
|
// Phase 7-Step1: Changed expect hint from 0→1 (unified path is now LIKELY)
|
||||||
if (__builtin_expect(TINY_FRONT_UNIFIED_GATE_ENABLED, 1)) {
|
if (__builtin_expect(TINY_FRONT_UNIFIED_GATE_ENABLED, 1)) {
|
||||||
int freed = free_tiny_fast(ptr);
|
// Phase FREE-TINY-FAST-HOTCOLD-OPT-1: Hot/Cold split dispatch
|
||||||
|
int freed;
|
||||||
|
if (__builtin_expect(hak_free_tiny_fast_hotcold_enabled(), 0)) {
|
||||||
|
freed = free_tiny_fast_hot(ptr); // NEW: Hot/Cold split version
|
||||||
|
} else {
|
||||||
|
freed = free_tiny_fast(ptr); // OLD: Legacy monolithic version
|
||||||
|
}
|
||||||
if (__builtin_expect(freed, 1)) {
|
if (__builtin_expect(freed, 1)) {
|
||||||
return; // Success (pushed to Unified Cache)
|
return; // Success (pushed to Unified Cache)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -63,6 +63,8 @@
|
|||||||
#include "../box/free_path_stats_box.h" // Phase FREE-LEGACY-BREAKDOWN-1: Free path stats
|
#include "../box/free_path_stats_box.h" // Phase FREE-LEGACY-BREAKDOWN-1: Free path stats
|
||||||
#include "../box/alloc_gate_stats_box.h" // Phase ALLOC-GATE-OPT-1: Alloc gate stats
|
#include "../box/alloc_gate_stats_box.h" // Phase ALLOC-GATE-OPT-1: Alloc gate stats
|
||||||
#include "../box/free_policy_fast_v2_box.h" // Phase POLICY-FAST-PATH-V2: Policy snapshot bypass
|
#include "../box/free_policy_fast_v2_box.h" // Phase POLICY-FAST-PATH-V2: Policy snapshot bypass
|
||||||
|
#include "../box/free_tiny_fast_hotcold_env_box.h" // Phase FREE-TINY-FAST-HOTCOLD-OPT-1: ENV control
|
||||||
|
#include "../box/free_tiny_fast_hotcold_stats_box.h" // Phase FREE-TINY-FAST-HOTCOLD-OPT-1: Stats
|
||||||
|
|
||||||
// Helper: current thread id (low 32 bits) for owner check
|
// Helper: current thread id (low 32 bits) for owner check
|
||||||
#ifndef TINY_SELF_U32_LOCAL_DEFINED
|
#ifndef TINY_SELF_U32_LOCAL_DEFINED
|
||||||
@ -216,6 +218,287 @@ static inline void* malloc_tiny_fast(size_t size) {
|
|||||||
return tiny_cold_refill_and_alloc(class_idx);
|
return tiny_cold_refill_and_alloc(class_idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Phase FREE-TINY-FAST-HOTCOLD-OPT-1: Hot/Cold split helpers
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
// Cold path: Cross-thread free, TinyHeap routes, and legacy fallback
|
||||||
|
// (noinline,cold to keep hot path small and I-cache clean)
|
||||||
|
__attribute__((noinline,cold))
|
||||||
|
static int free_tiny_fast_cold(void* ptr, void* base, int class_idx)
|
||||||
|
{
|
||||||
|
FREE_TINY_FAST_HOTCOLD_STAT_INC(cold_hit);
|
||||||
|
|
||||||
|
tiny_route_kind_t route = tiny_route_for_class((uint8_t)class_idx);
|
||||||
|
const int use_tiny_heap = tiny_route_is_heap_kind(route);
|
||||||
|
const TinyFrontV3Snapshot* front_snap =
|
||||||
|
__builtin_expect(tiny_front_v3_enabled(), 0) ? tiny_front_v3_snapshot_get() : NULL;
|
||||||
|
|
||||||
|
// TWO-SPEED: SuperSlab registration check is DEBUG-ONLY to keep HOT PATH fast.
|
||||||
|
// In Release builds, we trust header magic (0xA0) as sufficient validation.
|
||||||
|
#if !HAKMEM_BUILD_RELEASE
|
||||||
|
// Superslab 登録確認(誤分類防止)
|
||||||
|
SuperSlab* ss_guard = hak_super_lookup(ptr);
|
||||||
|
if (__builtin_expect(!(ss_guard && ss_guard->magic == SUPERSLAB_MAGIC), 0)) {
|
||||||
|
return 0; // hakmem 管理外 → 通常 free 経路へ
|
||||||
|
}
|
||||||
|
#endif // !HAKMEM_BUILD_RELEASE
|
||||||
|
|
||||||
|
// Cross-thread free detection (Larson MT crash fix, ENV gated) + TinyHeap free path
|
||||||
|
{
|
||||||
|
static __thread int g_larson_fix = -1;
|
||||||
|
if (__builtin_expect(g_larson_fix == -1, 0)) {
|
||||||
|
const char* e = getenv("HAKMEM_TINY_LARSON_FIX");
|
||||||
|
g_larson_fix = (e && *e && *e != '0') ? 1 : 0;
|
||||||
|
#if !HAKMEM_BUILD_RELEASE
|
||||||
|
fprintf(stderr, "[LARSON_FIX_INIT] g_larson_fix=%d (env=%s)\n", g_larson_fix, e ? e : "NULL");
|
||||||
|
fflush(stderr);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
if (__builtin_expect(g_larson_fix || use_tiny_heap, 0)) {
|
||||||
|
// Phase 12 optimization: Use fast mask-based lookup (~5-10 cycles vs 50-100)
|
||||||
|
SuperSlab* ss = ss_fast_lookup(base);
|
||||||
|
// Phase FREE-LEGACY-BREAKDOWN-1: カウンタ散布 (5. super_lookup 呼び出し)
|
||||||
|
FREE_PATH_STAT_INC(super_lookup_called);
|
||||||
|
if (ss) {
|
||||||
|
int slab_idx = slab_index_for(ss, base);
|
||||||
|
if (__builtin_expect(slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss), 1)) {
|
||||||
|
uint32_t self_tid = tiny_self_u32_local();
|
||||||
|
uint8_t owner_tid_low = ss_slab_meta_owner_tid_low_get(ss, slab_idx);
|
||||||
|
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||||
|
// LARSON FIX: Use bits 8-15 for comparison (pthread TIDs aligned to 256 bytes)
|
||||||
|
uint8_t self_tid_cmp = (uint8_t)((self_tid >> 8) & 0xFFu);
|
||||||
|
#if !HAKMEM_BUILD_RELEASE
|
||||||
|
static _Atomic uint64_t g_owner_check_count = 0;
|
||||||
|
uint64_t oc = atomic_fetch_add(&g_owner_check_count, 1);
|
||||||
|
if (oc < 10) {
|
||||||
|
fprintf(stderr, "[LARSON_FIX] Owner check: ptr=%p owner_tid_low=0x%02x self_tid_cmp=0x%02x self_tid=0x%08x match=%d\n",
|
||||||
|
ptr, owner_tid_low, self_tid_cmp, self_tid, (owner_tid_low == self_tid_cmp));
|
||||||
|
fflush(stderr);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (__builtin_expect(owner_tid_low != self_tid_cmp, 0)) {
|
||||||
|
// Cross-thread free → route to remote queue instead of poisoning TLS cache
|
||||||
|
FREE_TINY_FAST_HOTCOLD_STAT_INC(cold_cross_thread);
|
||||||
|
#if !HAKMEM_BUILD_RELEASE
|
||||||
|
static _Atomic uint64_t g_cross_thread_count = 0;
|
||||||
|
uint64_t ct = atomic_fetch_add(&g_cross_thread_count, 1);
|
||||||
|
if (ct < 20) {
|
||||||
|
fprintf(stderr, "[LARSON_FIX] Cross-thread free detected! ptr=%p owner_tid_low=0x%02x self_tid_cmp=0x%02x self_tid=0x%08x\n",
|
||||||
|
ptr, owner_tid_low, self_tid_cmp, self_tid);
|
||||||
|
fflush(stderr);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if (tiny_free_remote_box(ss, slab_idx, meta, ptr, self_tid)) {
|
||||||
|
// Phase FREE-LEGACY-BREAKDOWN-1: カウンタ散布 (6. cross-thread free)
|
||||||
|
FREE_PATH_STAT_INC(remote_free);
|
||||||
|
return 1; // handled via remote queue
|
||||||
|
}
|
||||||
|
return 0; // remote push failed; fall back to normal path
|
||||||
|
}
|
||||||
|
// Same-thread + TinyHeap route → route-based free
|
||||||
|
if (__builtin_expect(use_tiny_heap, 0)) {
|
||||||
|
FREE_TINY_FAST_HOTCOLD_STAT_INC(cold_tinyheap);
|
||||||
|
switch (route) {
|
||||||
|
case TINY_ROUTE_SMALL_HEAP_V7: {
|
||||||
|
// Phase v7-1: C6-only v7 stub (MID v3 fallback)
|
||||||
|
if (small_heap_free_fast_v7_stub(ptr, (uint8_t)class_idx)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
break; // fallthrough to legacy
|
||||||
|
}
|
||||||
|
case TINY_ROUTE_SMALL_HEAP_V6: {
|
||||||
|
// Phase V6-HDR-2: Headerless free (ENV gated)
|
||||||
|
if (small_v6_headerless_route_enabled((uint8_t)class_idx)) {
|
||||||
|
SmallHeapCtxV6* ctx_v6 = small_heap_ctx_v6();
|
||||||
|
if (small_v6_headerless_free(ctx_v6, ptr, (uint8_t)class_idx)) {
|
||||||
|
return 1; // Handled by v6
|
||||||
|
}
|
||||||
|
// v6 returned false -> fallback to legacy
|
||||||
|
}
|
||||||
|
break; // fallthrough to legacy
|
||||||
|
}
|
||||||
|
// Phase v10: v3/v4/v5 removed - routes now handled as LEGACY
|
||||||
|
case TINY_ROUTE_HOTHEAP_V2:
|
||||||
|
tiny_hotheap_v2_free((uint8_t)class_idx, base, meta);
|
||||||
|
// Phase FREE-LEGACY-BREAKDOWN-1: カウンタ散布 (v2 は tiny_heap_v1 にカウント)
|
||||||
|
FREE_PATH_STAT_INC(tiny_heap_v1_fast);
|
||||||
|
return 1;
|
||||||
|
case TINY_ROUTE_HEAP: {
|
||||||
|
tiny_heap_ctx_t* ctx = tiny_heap_ctx_for_thread();
|
||||||
|
if (class_idx == 7) {
|
||||||
|
tiny_c7_free_fast_with_meta(ss, slab_idx, base);
|
||||||
|
} else {
|
||||||
|
tiny_heap_free_class_fast_with_meta(ctx, class_idx, ss, slab_idx, base);
|
||||||
|
}
|
||||||
|
// Phase FREE-LEGACY-BREAKDOWN-1: カウンタ散布 (9. TinyHeap v1 route)
|
||||||
|
FREE_PATH_STAT_INC(tiny_heap_v1_fast);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (use_tiny_heap) {
|
||||||
|
// fallback: lookup failed but TinyHeap front is ON → use generic TinyHeap free
|
||||||
|
if (route == TINY_ROUTE_HOTHEAP_V2) {
|
||||||
|
tiny_hotheap_v2_record_free_fallback((uint8_t)class_idx);
|
||||||
|
}
|
||||||
|
// Phase v10: v3/v4 removed - no special fallback
|
||||||
|
tiny_heap_free_class_fast(tiny_heap_ctx_for_thread(), class_idx, ptr);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Debug: Log free operations (first 5000, all classes)
|
||||||
|
#if !HAKMEM_BUILD_RELEASE
|
||||||
|
{
|
||||||
|
extern _Atomic uint64_t g_debug_op_count;
|
||||||
|
extern __thread TinyTLSSLL g_tls_sll[];
|
||||||
|
uint64_t op = atomic_fetch_add(&g_debug_op_count, 1);
|
||||||
|
// Note: Shares g_debug_op_count with alloc logging, so bump the window.
|
||||||
|
if (op < 5000) {
|
||||||
|
fprintf(stderr, "[OP#%04lu FREE] cls=%d ptr=%p base=%p from=free_tiny_fast_cold tls_count_before=%u\n",
|
||||||
|
(unsigned long)op, class_idx, ptr, base,
|
||||||
|
g_tls_sll[class_idx].count);
|
||||||
|
fflush(stderr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Phase REFACTOR-2: Legacy fallback (use unified helper)
|
||||||
|
FREE_TINY_FAST_HOTCOLD_STAT_INC(cold_legacy_fallback);
|
||||||
|
tiny_legacy_fallback_free_base(base, class_idx);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hot path: Fast-path validation + ULTRA/MID/V7 routes
|
||||||
|
// (always_inline to minimize overhead on critical path)
|
||||||
|
__attribute__((always_inline))
|
||||||
|
static inline int free_tiny_fast_hot(void* ptr) {
|
||||||
|
if (__builtin_expect(!ptr, 0)) {
|
||||||
|
FREE_TINY_FAST_HOTCOLD_STAT_INC(ret0_null_ptr);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||||
|
// 1. ページ境界ガード:
|
||||||
|
// ptr がページ先頭 (offset==0) の場合、ptr-1 は別ページか未マップ領域になる可能性がある。
|
||||||
|
// その場合はヘッダ読みを行わず、通常 free 経路にフォールバックする。
|
||||||
|
uintptr_t off = (uintptr_t)ptr & 0xFFFu;
|
||||||
|
if (__builtin_expect(off == 0, 0)) {
|
||||||
|
FREE_TINY_FAST_HOTCOLD_STAT_INC(ret0_page_boundary);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Fast header magic validation (必須)
|
||||||
|
// Release ビルドでは tiny_region_id_read_header() が magic を省略するため、
|
||||||
|
// ここで自前に Tiny 専用ヘッダ (0xA0) を検証しておく。
|
||||||
|
uint8_t* header_ptr = (uint8_t*)ptr - 1;
|
||||||
|
uint8_t header = *header_ptr;
|
||||||
|
uint8_t magic = header & 0xF0u;
|
||||||
|
if (__builtin_expect(magic != HEADER_MAGIC, 0)) {
|
||||||
|
// Tiny ヘッダではない → Mid/Large/外部ポインタなので通常 free 経路へ
|
||||||
|
FREE_TINY_FAST_HOTCOLD_STAT_INC(ret0_bad_magic);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. class_idx 抽出(下位4bit)
|
||||||
|
int class_idx = (int)(header & HEADER_CLASS_MASK);
|
||||||
|
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
|
||||||
|
FREE_TINY_FAST_HOTCOLD_STAT_INC(ret0_bad_class);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. BASE を計算して Unified Cache に push
|
||||||
|
void* base = tiny_user_to_base_inline(ptr);
|
||||||
|
tiny_front_free_stat_inc(class_idx);
|
||||||
|
|
||||||
|
// Phase FREE-LEGACY-BREAKDOWN-1: カウンタ散布 (1. 関数入口)
|
||||||
|
FREE_PATH_STAT_INC(total_calls);
|
||||||
|
|
||||||
|
// Phase v11b-1: C7 ULTRA early-exit (skip policy snapshot for most common case)
|
||||||
|
if (class_idx == 7 && tiny_c7_ultra_enabled_env()) {
|
||||||
|
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_c7_ultra);
|
||||||
|
tiny_c7_ultra_free(ptr);
|
||||||
|
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_hit);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phase POLICY-FAST-PATH-V2: Skip policy snapshot for known-legacy classes
|
||||||
|
if (free_policy_fast_v2_can_skip((uint8_t)class_idx)) {
|
||||||
|
FREE_PATH_STAT_INC(policy_fast_v2_skip);
|
||||||
|
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_policy_fast_skip);
|
||||||
|
goto cold_path; // Delegate to cold path for legacy handling
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phase v11b-1: Policy-based single switch (replaces serial ULTRA checks)
|
||||||
|
const SmallPolicyV7* policy_free = small_policy_v7_snapshot();
|
||||||
|
SmallRouteKind route_kind_free = policy_free->route_kind[class_idx];
|
||||||
|
|
||||||
|
switch (route_kind_free) {
|
||||||
|
case SMALL_ROUTE_ULTRA: {
|
||||||
|
// Phase TLS-UNIFY-1: Unified ULTRA TLS push for C4-C6 (C7 handled above)
|
||||||
|
if (class_idx >= 4 && class_idx <= 6) {
|
||||||
|
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_ultra_tls);
|
||||||
|
tiny_ultra_tls_push((uint8_t)class_idx, base);
|
||||||
|
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_hit);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
// ULTRA for other classes → fallback to cold path
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case SMALL_ROUTE_MID_V35: {
|
||||||
|
// Phase v11a-3: MID v3.5 free
|
||||||
|
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_mid_v35);
|
||||||
|
small_mid_v35_free(ptr, class_idx);
|
||||||
|
FREE_PATH_STAT_INC(smallheap_v7_fast);
|
||||||
|
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_hit);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
case SMALL_ROUTE_V7: {
|
||||||
|
// Phase v7: SmallObject v7 free (research box)
|
||||||
|
if (small_heap_free_fast_v7_stub(ptr, (uint8_t)class_idx)) {
|
||||||
|
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_v7);
|
||||||
|
FREE_PATH_STAT_INC(smallheap_v7_fast);
|
||||||
|
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_hit);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
// V7 miss → fallback to cold path
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case SMALL_ROUTE_MID_V3: {
|
||||||
|
// Phase MID-V3: delegate to MID v3.5
|
||||||
|
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_mid_v35);
|
||||||
|
small_mid_v35_free(ptr, class_idx);
|
||||||
|
FREE_PATH_STAT_INC(smallheap_v7_fast);
|
||||||
|
FREE_TINY_FAST_HOTCOLD_STAT_INC(hot_hit);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
case SMALL_ROUTE_LEGACY:
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
cold_path:
|
||||||
|
// Delegate to cold path for cross-thread, TinyHeap, and legacy handling
|
||||||
|
return free_tiny_fast_cold(ptr, base, class_idx);
|
||||||
|
|
||||||
|
#else
|
||||||
|
// No header mode - fall back to normal free
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Phase 26-B: free_tiny_fast() - Ultra-thin Tiny deallocation
|
// Phase 26-B: free_tiny_fast() - Ultra-thin Tiny deallocation
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|||||||
Reference in New Issue
Block a user