2025-11-07 01:27:04 +09:00
|
|
|
|
// hak_free_api.inc.h — Box: hak_free_at() implementation
|
2025-11-15 23:00:21 +09:00
|
|
|
|
// Phase 15: Box Separation - One-way routing (FG → Domain boxes → ExternalGuard)
|
2025-11-07 01:27:04 +09:00
|
|
|
|
#ifndef HAK_FREE_API_INC_H
|
|
|
|
|
|
#define HAK_FREE_API_INC_H
|
|
|
|
|
|
|
2025-12-16 15:01:56 +09:00
|
|
|
|
#include "../hakmem_build_flags.h" // Phase 39: HAKMEM_BENCH_MINIMAL (GO +1.98%)
|
2025-11-14 06:09:02 +09:00
|
|
|
|
#include <sys/mman.h> // For mincore() in AllocHeader safety check
|
2025-11-07 17:34:24 +09:00
|
|
|
|
#include "hakmem_tiny_superslab.h" // For SUPERSLAB_MAGIC, SuperSlab
|
2025-11-11 00:02:24 +09:00
|
|
|
|
#include "../ptr_trace.h" // Debug: pointer trace immediate dump on libc fallback
|
2025-12-04 16:21:54 +09:00
|
|
|
|
#include "../hakmem_trace_master.h" // Unified trace control (HAKMEM_TRACE + per-feature ENV)
|
2025-11-15 23:00:21 +09:00
|
|
|
|
#include "front_gate_v2.h" // Phase 15: Box FG V2 - 1-byte header classification
|
|
|
|
|
|
#include "external_guard_box.h" // Phase 15: Box ExternalGuard - mincore (ENV controlled)
|
2025-12-01 16:05:55 +09:00
|
|
|
|
#include "fg_tiny_gate_box.h" // Tiny gate guard box (Superslab check)
|
2025-12-04 11:58:37 +09:00
|
|
|
|
#include "tiny_free_gate_box.h" // Tiny Free Gatekeeper Box (USER→Fast Path 境界)
|
Phase FREE-DISPATCHER-OPT-1: free dispatcher 統計計測
**目的**: free dispatcher(29%)の内訳を細分化して計測。
**実装内容**:
- FreeDispatchStats 構造体追加(ENV: HAKMEM_FREE_DISPATCH_STATS, default 0)
- カウンタ: total_calls / domain (tiny/mid/large) / route (ultra/legacy/pool/v6) / env_checks / route_for_class_calls
- hak_free_at / tiny_route_for_class / tiny_route_snapshot_init にカウンタ埋め込み
- 挙動変更なし(計測のみ、ENV OFF 時は overhead ゼロ)
**計測結果**:
Mixed 16-1024B (1M iter, ws=400):
- total=8,081, route_calls=267,967, env_checks=9
- BENCH_FAST_FRONT により大半は早期リターン
- route_for_class は主に alloc 側で呼ばれる(267k calls vs 8k frees)
- ENV check は初期化時の 9回のみ(snapshot 効果)
C6-heavy (257-768B, 1M iter, ws=400):
- total=500,099, route_calls=1,034, env_checks=9
- fg_classify_domain に到達する free が多い
- route_for_class 呼び出しは極小(snapshot 効果)
**結論**:
- ENV check は既に十分最適化されている(初期化時のみ)
- route_for_class は alloc 側での呼び出しが主で、free 側は snapshot で O(1)
- 次フェーズ(OPT-2)では別のアプローチを検討
**ドキュメント追加**:
- docs/analysis/FREE_DISPATCHER_ANALYSIS.md(新規)
- CURRENT_TASK.md に Phase FREE-DISPATCHER-OPT-1 セクション追加
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-11 21:21:40 +09:00
|
|
|
|
#include "free_dispatch_stats_box.h" // Phase FREE-DISPATCHER-OPT-1: free dispatcher stats
|
2025-12-12 01:04:55 +09:00
|
|
|
|
#include "region_id_v6_box.h" // Phase MID-V3: RegionIdBox for ownership lookup
|
|
|
|
|
|
#include "mid_hotbox_v3_box.h" // Phase MID-V3: Mid/Pool HotBox v3 types
|
|
|
|
|
|
#include "mid_hotbox_v3_env_box.h" // Phase MID-V3: ENV gate for v3
|
2025-11-07 17:34:24 +09:00
|
|
|
|
|
2025-11-08 23:53:25 +09:00
|
|
|
|
#ifdef HAKMEM_POOL_TLS_PHASE1
|
|
|
|
|
|
#include "../pool_tls.h"
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
2025-11-29 14:39:07 +09:00
|
|
|
|
#include "mid_large_config_box.h" // Phase 5-Step3: Compile-time config for Mid/Large
|
|
|
|
|
|
|
2025-11-07 01:27:04 +09:00
|
|
|
|
// Optional route trace: print first N classification lines when enabled by env
|
2025-11-13 13:32:58 +09:00
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
2025-11-07 01:27:04 +09:00
|
|
|
|
static inline int hak_free_route_trace_on(void) {
|
|
|
|
|
|
static int g_trace = -1;
|
|
|
|
|
|
if (__builtin_expect(g_trace == -1, 0)) {
|
2025-12-04 16:21:54 +09:00
|
|
|
|
// Unified trace: HAKMEM_FREE_ROUTE_TRACE or HAKMEM_TRACE=free
|
|
|
|
|
|
g_trace = hak_trace_check("HAKMEM_FREE_ROUTE_TRACE", "free");
|
2025-11-07 01:27:04 +09:00
|
|
|
|
}
|
|
|
|
|
|
return g_trace;
|
|
|
|
|
|
}
|
|
|
|
|
|
static inline int* hak_free_route_budget_ptr(void) {
|
|
|
|
|
|
static int g_budget = 32; // first 32 frees only
|
|
|
|
|
|
return &g_budget;
|
|
|
|
|
|
}
|
|
|
|
|
|
static inline void hak_free_route_log(const char* tag, void* p) {
|
|
|
|
|
|
if (!hak_free_route_trace_on()) return;
|
|
|
|
|
|
int* budget = hak_free_route_budget_ptr();
|
|
|
|
|
|
if (*budget <= 0) return;
|
|
|
|
|
|
(*budget)--;
|
|
|
|
|
|
fprintf(stderr, "[FREE_ROUTE] %s ptr=%p\n", tag, p);
|
|
|
|
|
|
}
|
2025-11-13 13:32:58 +09:00
|
|
|
|
#else
|
|
|
|
|
|
static inline void hak_free_route_log(const char* tag, void* p) { (void)tag; (void)p; }
|
|
|
|
|
|
#endif
|
2025-11-07 01:27:04 +09:00
|
|
|
|
|
2025-11-07 17:34:24 +09:00
|
|
|
|
// Optional: request-trace for invalid-magic cases (first N hits)
|
|
|
|
|
|
static inline int hak_super_reg_reqtrace_on(void) {
|
|
|
|
|
|
static int g_on = -1;
|
|
|
|
|
|
if (__builtin_expect(g_on == -1, 0)) {
|
2025-12-04 16:21:54 +09:00
|
|
|
|
// Unified trace: HAKMEM_SUPER_REG_REQTRACE or HAKMEM_TRACE=registry
|
|
|
|
|
|
g_on = hak_trace_check("HAKMEM_SUPER_REG_REQTRACE", "registry");
|
2025-11-07 17:34:24 +09:00
|
|
|
|
}
|
|
|
|
|
|
return g_on;
|
|
|
|
|
|
}
|
|
|
|
|
|
static inline int* hak_super_reg_reqtrace_budget_ptr(void) {
|
|
|
|
|
|
static int g_budget = 16; // trace first 16 occurrences
|
|
|
|
|
|
return &g_budget;
|
|
|
|
|
|
}
|
|
|
|
|
|
static inline void hak_super_reg_reqtrace_dump(void* ptr) {
|
|
|
|
|
|
if (!hak_super_reg_reqtrace_on()) return;
|
|
|
|
|
|
int* b = hak_super_reg_reqtrace_budget_ptr();
|
|
|
|
|
|
if (*b <= 0) return;
|
|
|
|
|
|
(*b)--;
|
|
|
|
|
|
uintptr_t p = (uintptr_t)ptr;
|
|
|
|
|
|
uintptr_t m20 = ((uintptr_t)1 << 20) - 1;
|
|
|
|
|
|
uintptr_t m21 = ((uintptr_t)1 << 21) - 1;
|
|
|
|
|
|
SuperSlab* s20 = (SuperSlab*)(p & ~m20);
|
|
|
|
|
|
SuperSlab* s21 = (SuperSlab*)(p & ~m21);
|
|
|
|
|
|
unsigned long long mg20 = 0, mg21 = 0;
|
|
|
|
|
|
// Best-effort reads (may be unmapped; wrap in volatile access)
|
|
|
|
|
|
mg20 = (unsigned long long)(s20 ? s20->magic : 0);
|
|
|
|
|
|
mg21 = (unsigned long long)(s21 ? s21->magic : 0);
|
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
|
"[SUPER_REG_REQTRACE] ptr=%p base1M=%p magic1M=0x%llx base2M=%p magic2M=0x%llx\n",
|
|
|
|
|
|
ptr, (void*)s20, mg20, (void*)s21, mg21);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-11-07 01:27:04 +09:00
|
|
|
|
#ifndef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
|
|
|
|
|
__attribute__((always_inline))
|
|
|
|
|
|
inline
|
|
|
|
|
|
#endif
|
|
|
|
|
|
void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
|
Phase FREE-DISPATCHER-OPT-1: free dispatcher 統計計測
**目的**: free dispatcher(29%)の内訳を細分化して計測。
**実装内容**:
- FreeDispatchStats 構造体追加(ENV: HAKMEM_FREE_DISPATCH_STATS, default 0)
- カウンタ: total_calls / domain (tiny/mid/large) / route (ultra/legacy/pool/v6) / env_checks / route_for_class_calls
- hak_free_at / tiny_route_for_class / tiny_route_snapshot_init にカウンタ埋め込み
- 挙動変更なし(計測のみ、ENV OFF 時は overhead ゼロ)
**計測結果**:
Mixed 16-1024B (1M iter, ws=400):
- total=8,081, route_calls=267,967, env_checks=9
- BENCH_FAST_FRONT により大半は早期リターン
- route_for_class は主に alloc 側で呼ばれる(267k calls vs 8k frees)
- ENV check は初期化時の 9回のみ(snapshot 効果)
C6-heavy (257-768B, 1M iter, ws=400):
- total=500,099, route_calls=1,034, env_checks=9
- fg_classify_domain に到達する free が多い
- route_for_class 呼び出しは極小(snapshot 効果)
**結論**:
- ENV check は既に十分最適化されている(初期化時のみ)
- route_for_class は alloc 側での呼び出しが主で、free 側は snapshot で O(1)
- 次フェーズ(OPT-2)では別のアプローチを検討
**ドキュメント追加**:
- docs/analysis/FREE_DISPATCHER_ANALYSIS.md(新規)
- CURRENT_TASK.md に Phase FREE-DISPATCHER-OPT-1 セクション追加
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-11 21:21:40 +09:00
|
|
|
|
// Phase FREE-DISPATCHER-OPT-1: Total call counter (at function entry)
|
|
|
|
|
|
FREE_DISPATCH_STAT_INC(total_calls);
|
|
|
|
|
|
|
2025-11-07 01:27:04 +09:00
|
|
|
|
#if HAKMEM_DEBUG_TIMING
|
|
|
|
|
|
HKM_TIME_START(t0);
|
|
|
|
|
|
#endif
|
2025-12-03 20:42:28 +09:00
|
|
|
|
static _Atomic int g_hak_free_at_trace = 0;
|
|
|
|
|
|
if (atomic_fetch_add_explicit(&g_hak_free_at_trace, 1, memory_order_relaxed) < 128) {
|
|
|
|
|
|
HAK_TRACE("[hak_free_at_enter]\n");
|
|
|
|
|
|
}
|
2025-11-07 01:27:04 +09:00
|
|
|
|
(void)site; (void)size;
|
2025-12-01 16:05:55 +09:00
|
|
|
|
int fg_misclass = 0; // Set when FG said Tiny but registry rejects
|
2025-11-10 18:21:32 +09:00
|
|
|
|
// Optional lightweight trace of early free calls (first few only)
|
2025-11-13 13:32:58 +09:00
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
2025-11-10 18:21:32 +09:00
|
|
|
|
static int free_trace_en = -1; static _Atomic int free_trace_count = 0;
|
|
|
|
|
|
if (__builtin_expect(free_trace_en == -1, 0)) {
|
2025-12-04 16:21:54 +09:00
|
|
|
|
// Unified trace: HAKMEM_FREE_WRAP_TRACE or HAKMEM_TRACE=free
|
|
|
|
|
|
free_trace_en = hak_trace_check("HAKMEM_FREE_WRAP_TRACE", "free");
|
2025-11-10 18:21:32 +09:00
|
|
|
|
}
|
|
|
|
|
|
if (free_trace_en) {
|
|
|
|
|
|
int n = atomic_fetch_add(&free_trace_count, 1);
|
|
|
|
|
|
if (n < 8) {
|
|
|
|
|
|
fprintf(stderr, "[FREE_WRAP_ENTER] ptr=%p\n", ptr);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2025-11-13 13:32:58 +09:00
|
|
|
|
#endif
|
Front-Direct implementation: SS→FC direct refill + SLL complete bypass
## Summary
Implemented Front-Direct architecture with complete SLL bypass:
- Direct SuperSlab → FastCache refill (1-hop, bypasses SLL)
- SLL-free allocation/free paths when Front-Direct enabled
- Legacy path sealing (SLL inline opt-in, SFC cascade ENV-only)
## New Modules
- core/refill/ss_refill_fc.h (236 lines): Standard SS→FC refill entry point
- Remote drain → Freelist → Carve priority
- Header restoration for C1-C6 (NOT C0/C7)
- ENV: HAKMEM_TINY_P0_DRAIN_THRESH, HAKMEM_TINY_P0_NO_DRAIN
- core/front/fast_cache.h: FastCache (L1) type definition
- core/front/quick_slot.h: QuickSlot (L0) type definition
## Allocation Path (core/tiny_alloc_fast.inc.h)
- Added s_front_direct_alloc TLS flag (lazy ENV check)
- SLL pop guarded by: g_tls_sll_enable && !s_front_direct_alloc
- Refill dispatch:
- Front-Direct: ss_refill_fc_fill() → fastcache_pop() (1-hop)
- Legacy: sll_refill_batch_from_ss() → SLL → FC (2-hop, A/B only)
- SLL inline pop sealed (requires HAKMEM_TINY_INLINE_SLL=1 opt-in)
## Free Path (core/hakmem_tiny_free.inc, core/hakmem_tiny_fastcache.inc.h)
- FC priority: Try fastcache_push() first (same-thread free)
- tiny_fast_push() bypass: Returns 0 when s_front_direct_free || !g_tls_sll_enable
- Fallback: Magazine/slow path (safe, bypasses SLL)
## Legacy Sealing
- SFC cascade: Default OFF (ENV-only via HAKMEM_TINY_SFC_CASCADE=1)
- Deleted: core/hakmem_tiny_free.inc.bak, core/pool_refill_legacy.c.bak
- Documentation: ss_refill_fc_fill() promoted as CANONICAL refill entry
## ENV Controls
- HAKMEM_TINY_FRONT_DIRECT=1: Enable Front-Direct (SS→FC direct)
- HAKMEM_TINY_P0_DIRECT_FC_ALL=1: Same as above (alt name)
- HAKMEM_TINY_REFILL_BATCH=1: Enable batch refill (also enables Front-Direct)
- HAKMEM_TINY_SFC_CASCADE=1: Enable SFC cascade (default OFF)
- HAKMEM_TINY_INLINE_SLL=1: Enable inline SLL pop (default OFF, requires AGGRESSIVE_INLINE)
## Benchmarks (Front-Direct Enabled)
```bash
ENV: HAKMEM_BENCH_FAST_FRONT=1 HAKMEM_TINY_FRONT_DIRECT=1
HAKMEM_TINY_REFILL_BATCH=1 HAKMEM_TINY_P0_DIRECT_FC_ALL=1
HAKMEM_TINY_REFILL_COUNT_HOT=256 HAKMEM_TINY_REFILL_COUNT_MID=96
HAKMEM_TINY_BUMP_CHUNK=256
bench_random_mixed (16-1040B random, 200K iter):
256 slots: 1.44M ops/s (STABLE, 0 SEGV)
128 slots: 1.44M ops/s (STABLE, 0 SEGV)
bench_fixed_size (fixed size, 200K iter):
256B: 4.06M ops/s (has debug logs, expected >10M without logs)
128B: Similar (debug logs affect)
```
## Verification
- TRACE_RING test (10K iter): **0 SLL events** detected ✅
- Complete SLL bypass confirmed when Front-Direct=1
- Stable execution: 200K iterations × multiple sizes, 0 SEGV
## Next Steps
- Disable debug logs in hak_alloc_api.inc.h (call_num 14250-14280 range)
- Re-benchmark with clean Release build (target: 10-15M ops/s)
- 128/256B shortcut path optimization (FC hit rate improvement)
Co-Authored-By: ChatGPT <chatgpt@openai.com>
Suggested-By: ultrathink
2025-11-14 05:41:49 +09:00
|
|
|
|
// Bench-only ultra-short path: try header-based tiny fast free first
|
|
|
|
|
|
// Enable with: HAKMEM_BENCH_FAST_FRONT=1
|
2025-12-16 15:01:56 +09:00
|
|
|
|
// Phase 39: BENCH_MINIMAL → compile-out (GO +1.98%)
|
|
|
|
|
|
#if !HAKMEM_BENCH_MINIMAL
|
Front-Direct implementation: SS→FC direct refill + SLL complete bypass
## Summary
Implemented Front-Direct architecture with complete SLL bypass:
- Direct SuperSlab → FastCache refill (1-hop, bypasses SLL)
- SLL-free allocation/free paths when Front-Direct enabled
- Legacy path sealing (SLL inline opt-in, SFC cascade ENV-only)
## New Modules
- core/refill/ss_refill_fc.h (236 lines): Standard SS→FC refill entry point
- Remote drain → Freelist → Carve priority
- Header restoration for C1-C6 (NOT C0/C7)
- ENV: HAKMEM_TINY_P0_DRAIN_THRESH, HAKMEM_TINY_P0_NO_DRAIN
- core/front/fast_cache.h: FastCache (L1) type definition
- core/front/quick_slot.h: QuickSlot (L0) type definition
## Allocation Path (core/tiny_alloc_fast.inc.h)
- Added s_front_direct_alloc TLS flag (lazy ENV check)
- SLL pop guarded by: g_tls_sll_enable && !s_front_direct_alloc
- Refill dispatch:
- Front-Direct: ss_refill_fc_fill() → fastcache_pop() (1-hop)
- Legacy: sll_refill_batch_from_ss() → SLL → FC (2-hop, A/B only)
- SLL inline pop sealed (requires HAKMEM_TINY_INLINE_SLL=1 opt-in)
## Free Path (core/hakmem_tiny_free.inc, core/hakmem_tiny_fastcache.inc.h)
- FC priority: Try fastcache_push() first (same-thread free)
- tiny_fast_push() bypass: Returns 0 when s_front_direct_free || !g_tls_sll_enable
- Fallback: Magazine/slow path (safe, bypasses SLL)
## Legacy Sealing
- SFC cascade: Default OFF (ENV-only via HAKMEM_TINY_SFC_CASCADE=1)
- Deleted: core/hakmem_tiny_free.inc.bak, core/pool_refill_legacy.c.bak
- Documentation: ss_refill_fc_fill() promoted as CANONICAL refill entry
## ENV Controls
- HAKMEM_TINY_FRONT_DIRECT=1: Enable Front-Direct (SS→FC direct)
- HAKMEM_TINY_P0_DIRECT_FC_ALL=1: Same as above (alt name)
- HAKMEM_TINY_REFILL_BATCH=1: Enable batch refill (also enables Front-Direct)
- HAKMEM_TINY_SFC_CASCADE=1: Enable SFC cascade (default OFF)
- HAKMEM_TINY_INLINE_SLL=1: Enable inline SLL pop (default OFF, requires AGGRESSIVE_INLINE)
## Benchmarks (Front-Direct Enabled)
```bash
ENV: HAKMEM_BENCH_FAST_FRONT=1 HAKMEM_TINY_FRONT_DIRECT=1
HAKMEM_TINY_REFILL_BATCH=1 HAKMEM_TINY_P0_DIRECT_FC_ALL=1
HAKMEM_TINY_REFILL_COUNT_HOT=256 HAKMEM_TINY_REFILL_COUNT_MID=96
HAKMEM_TINY_BUMP_CHUNK=256
bench_random_mixed (16-1040B random, 200K iter):
256 slots: 1.44M ops/s (STABLE, 0 SEGV)
128 slots: 1.44M ops/s (STABLE, 0 SEGV)
bench_fixed_size (fixed size, 200K iter):
256B: 4.06M ops/s (has debug logs, expected >10M without logs)
128B: Similar (debug logs affect)
```
## Verification
- TRACE_RING test (10K iter): **0 SLL events** detected ✅
- Complete SLL bypass confirmed when Front-Direct=1
- Stable execution: 200K iterations × multiple sizes, 0 SEGV
## Next Steps
- Disable debug logs in hak_alloc_api.inc.h (call_num 14250-14280 range)
- Re-benchmark with clean Release build (target: 10-15M ops/s)
- 128/256B shortcut path optimization (FC hit rate improvement)
Co-Authored-By: ChatGPT <chatgpt@openai.com>
Suggested-By: ultrathink
2025-11-14 05:41:49 +09:00
|
|
|
|
{
|
|
|
|
|
|
static int g_bench_fast_front = -1;
|
|
|
|
|
|
if (__builtin_expect(g_bench_fast_front == -1, 0)) {
|
|
|
|
|
|
const char* e = getenv("HAKMEM_BENCH_FAST_FRONT");
|
|
|
|
|
|
g_bench_fast_front = (e && *e && *e != '0') ? 1 : 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
#if HAKMEM_TINY_HEADER_CLASSIDX
|
|
|
|
|
|
if (__builtin_expect(g_bench_fast_front && ptr != NULL, 0)) {
|
2025-12-04 11:58:37 +09:00
|
|
|
|
if (__builtin_expect(tiny_free_gate_try_fast(ptr), 1)) {
|
Front-Direct implementation: SS→FC direct refill + SLL complete bypass
## Summary
Implemented Front-Direct architecture with complete SLL bypass:
- Direct SuperSlab → FastCache refill (1-hop, bypasses SLL)
- SLL-free allocation/free paths when Front-Direct enabled
- Legacy path sealing (SLL inline opt-in, SFC cascade ENV-only)
## New Modules
- core/refill/ss_refill_fc.h (236 lines): Standard SS→FC refill entry point
- Remote drain → Freelist → Carve priority
- Header restoration for C1-C6 (NOT C0/C7)
- ENV: HAKMEM_TINY_P0_DRAIN_THRESH, HAKMEM_TINY_P0_NO_DRAIN
- core/front/fast_cache.h: FastCache (L1) type definition
- core/front/quick_slot.h: QuickSlot (L0) type definition
## Allocation Path (core/tiny_alloc_fast.inc.h)
- Added s_front_direct_alloc TLS flag (lazy ENV check)
- SLL pop guarded by: g_tls_sll_enable && !s_front_direct_alloc
- Refill dispatch:
- Front-Direct: ss_refill_fc_fill() → fastcache_pop() (1-hop)
- Legacy: sll_refill_batch_from_ss() → SLL → FC (2-hop, A/B only)
- SLL inline pop sealed (requires HAKMEM_TINY_INLINE_SLL=1 opt-in)
## Free Path (core/hakmem_tiny_free.inc, core/hakmem_tiny_fastcache.inc.h)
- FC priority: Try fastcache_push() first (same-thread free)
- tiny_fast_push() bypass: Returns 0 when s_front_direct_free || !g_tls_sll_enable
- Fallback: Magazine/slow path (safe, bypasses SLL)
## Legacy Sealing
- SFC cascade: Default OFF (ENV-only via HAKMEM_TINY_SFC_CASCADE=1)
- Deleted: core/hakmem_tiny_free.inc.bak, core/pool_refill_legacy.c.bak
- Documentation: ss_refill_fc_fill() promoted as CANONICAL refill entry
## ENV Controls
- HAKMEM_TINY_FRONT_DIRECT=1: Enable Front-Direct (SS→FC direct)
- HAKMEM_TINY_P0_DIRECT_FC_ALL=1: Same as above (alt name)
- HAKMEM_TINY_REFILL_BATCH=1: Enable batch refill (also enables Front-Direct)
- HAKMEM_TINY_SFC_CASCADE=1: Enable SFC cascade (default OFF)
- HAKMEM_TINY_INLINE_SLL=1: Enable inline SLL pop (default OFF, requires AGGRESSIVE_INLINE)
## Benchmarks (Front-Direct Enabled)
```bash
ENV: HAKMEM_BENCH_FAST_FRONT=1 HAKMEM_TINY_FRONT_DIRECT=1
HAKMEM_TINY_REFILL_BATCH=1 HAKMEM_TINY_P0_DIRECT_FC_ALL=1
HAKMEM_TINY_REFILL_COUNT_HOT=256 HAKMEM_TINY_REFILL_COUNT_MID=96
HAKMEM_TINY_BUMP_CHUNK=256
bench_random_mixed (16-1040B random, 200K iter):
256 slots: 1.44M ops/s (STABLE, 0 SEGV)
128 slots: 1.44M ops/s (STABLE, 0 SEGV)
bench_fixed_size (fixed size, 200K iter):
256B: 4.06M ops/s (has debug logs, expected >10M without logs)
128B: Similar (debug logs affect)
```
## Verification
- TRACE_RING test (10K iter): **0 SLL events** detected ✅
- Complete SLL bypass confirmed when Front-Direct=1
- Stable execution: 200K iterations × multiple sizes, 0 SEGV
## Next Steps
- Disable debug logs in hak_alloc_api.inc.h (call_num 14250-14280 range)
- Re-benchmark with clean Release build (target: 10-15M ops/s)
- 128/256B shortcut path optimization (FC hit rate improvement)
Co-Authored-By: ChatGPT <chatgpt@openai.com>
Suggested-By: ultrathink
2025-11-14 05:41:49 +09:00
|
|
|
|
#if HAKMEM_DEBUG_TIMING
|
|
|
|
|
|
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
|
|
|
|
|
|
#endif
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
#endif
|
|
|
|
|
|
}
|
2025-12-16 15:01:56 +09:00
|
|
|
|
#endif
|
Front-Direct implementation: SS→FC direct refill + SLL complete bypass
## Summary
Implemented Front-Direct architecture with complete SLL bypass:
- Direct SuperSlab → FastCache refill (1-hop, bypasses SLL)
- SLL-free allocation/free paths when Front-Direct enabled
- Legacy path sealing (SLL inline opt-in, SFC cascade ENV-only)
## New Modules
- core/refill/ss_refill_fc.h (236 lines): Standard SS→FC refill entry point
- Remote drain → Freelist → Carve priority
- Header restoration for C1-C6 (NOT C0/C7)
- ENV: HAKMEM_TINY_P0_DRAIN_THRESH, HAKMEM_TINY_P0_NO_DRAIN
- core/front/fast_cache.h: FastCache (L1) type definition
- core/front/quick_slot.h: QuickSlot (L0) type definition
## Allocation Path (core/tiny_alloc_fast.inc.h)
- Added s_front_direct_alloc TLS flag (lazy ENV check)
- SLL pop guarded by: g_tls_sll_enable && !s_front_direct_alloc
- Refill dispatch:
- Front-Direct: ss_refill_fc_fill() → fastcache_pop() (1-hop)
- Legacy: sll_refill_batch_from_ss() → SLL → FC (2-hop, A/B only)
- SLL inline pop sealed (requires HAKMEM_TINY_INLINE_SLL=1 opt-in)
## Free Path (core/hakmem_tiny_free.inc, core/hakmem_tiny_fastcache.inc.h)
- FC priority: Try fastcache_push() first (same-thread free)
- tiny_fast_push() bypass: Returns 0 when s_front_direct_free || !g_tls_sll_enable
- Fallback: Magazine/slow path (safe, bypasses SLL)
## Legacy Sealing
- SFC cascade: Default OFF (ENV-only via HAKMEM_TINY_SFC_CASCADE=1)
- Deleted: core/hakmem_tiny_free.inc.bak, core/pool_refill_legacy.c.bak
- Documentation: ss_refill_fc_fill() promoted as CANONICAL refill entry
## ENV Controls
- HAKMEM_TINY_FRONT_DIRECT=1: Enable Front-Direct (SS→FC direct)
- HAKMEM_TINY_P0_DIRECT_FC_ALL=1: Same as above (alt name)
- HAKMEM_TINY_REFILL_BATCH=1: Enable batch refill (also enables Front-Direct)
- HAKMEM_TINY_SFC_CASCADE=1: Enable SFC cascade (default OFF)
- HAKMEM_TINY_INLINE_SLL=1: Enable inline SLL pop (default OFF, requires AGGRESSIVE_INLINE)
## Benchmarks (Front-Direct Enabled)
```bash
ENV: HAKMEM_BENCH_FAST_FRONT=1 HAKMEM_TINY_FRONT_DIRECT=1
HAKMEM_TINY_REFILL_BATCH=1 HAKMEM_TINY_P0_DIRECT_FC_ALL=1
HAKMEM_TINY_REFILL_COUNT_HOT=256 HAKMEM_TINY_REFILL_COUNT_MID=96
HAKMEM_TINY_BUMP_CHUNK=256
bench_random_mixed (16-1040B random, 200K iter):
256 slots: 1.44M ops/s (STABLE, 0 SEGV)
128 slots: 1.44M ops/s (STABLE, 0 SEGV)
bench_fixed_size (fixed size, 200K iter):
256B: 4.06M ops/s (has debug logs, expected >10M without logs)
128B: Similar (debug logs affect)
```
## Verification
- TRACE_RING test (10K iter): **0 SLL events** detected ✅
- Complete SLL bypass confirmed when Front-Direct=1
- Stable execution: 200K iterations × multiple sizes, 0 SEGV
## Next Steps
- Disable debug logs in hak_alloc_api.inc.h (call_num 14250-14280 range)
- Re-benchmark with clean Release build (target: 10-15M ops/s)
- 128/256B shortcut path optimization (FC hit rate improvement)
Co-Authored-By: ChatGPT <chatgpt@openai.com>
Suggested-By: ultrathink
2025-11-14 05:41:49 +09:00
|
|
|
|
|
2025-11-07 01:27:04 +09:00
|
|
|
|
if (!ptr) {
|
|
|
|
|
|
#if HAKMEM_DEBUG_TIMING
|
|
|
|
|
|
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
|
|
|
|
|
|
#endif
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-11-15 23:00:21 +09:00
|
|
|
|
// ========== Phase 15: Box FG V2 Classification ==========
|
|
|
|
|
|
// One-way routing: FG → Domain boxes → ExternalGuard
|
|
|
|
|
|
// Box FG V2: Ultra-fast 1-byte header classification (no mincore, no registry)
|
|
|
|
|
|
fg_classification_t fg = fg_classify_domain(ptr);
|
|
|
|
|
|
hak_free_route_log(fg_domain_name(fg.domain), ptr);
|
2025-11-10 16:48:20 +09:00
|
|
|
|
|
2025-12-01 16:05:55 +09:00
|
|
|
|
// Fail-Fast: Tiny判定は Superslab 登録が必須。無ければ MIDCAND に戻す(箱化)。
|
|
|
|
|
|
fg_tiny_gate_result_t fg_guard = fg_tiny_gate(ptr, fg);
|
|
|
|
|
|
fg = fg_guard.fg;
|
|
|
|
|
|
fg_misclass = fg_guard.misclassified;
|
|
|
|
|
|
|
Phase FREE-DISPATCHER-OPT-1: free dispatcher 統計計測
**目的**: free dispatcher(29%)の内訳を細分化して計測。
**実装内容**:
- FreeDispatchStats 構造体追加(ENV: HAKMEM_FREE_DISPATCH_STATS, default 0)
- カウンタ: total_calls / domain (tiny/mid/large) / route (ultra/legacy/pool/v6) / env_checks / route_for_class_calls
- hak_free_at / tiny_route_for_class / tiny_route_snapshot_init にカウンタ埋め込み
- 挙動変更なし(計測のみ、ENV OFF 時は overhead ゼロ)
**計測結果**:
Mixed 16-1024B (1M iter, ws=400):
- total=8,081, route_calls=267,967, env_checks=9
- BENCH_FAST_FRONT により大半は早期リターン
- route_for_class は主に alloc 側で呼ばれる(267k calls vs 8k frees)
- ENV check は初期化時の 9回のみ(snapshot 効果)
C6-heavy (257-768B, 1M iter, ws=400):
- total=500,099, route_calls=1,034, env_checks=9
- fg_classify_domain に到達する free が多い
- route_for_class 呼び出しは極小(snapshot 効果)
**結論**:
- ENV check は既に十分最適化されている(初期化時のみ)
- route_for_class は alloc 側での呼び出しが主で、free 側は snapshot で O(1)
- 次フェーズ(OPT-2)では別のアプローチを検討
**ドキュメント追加**:
- docs/analysis/FREE_DISPATCHER_ANALYSIS.md(新規)
- CURRENT_TASK.md に Phase FREE-DISPATCHER-OPT-1 セクション追加
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-11 21:21:40 +09:00
|
|
|
|
// Phase FREE-DISPATCHER-OPT-1: Domain classification counters
|
|
|
|
|
|
if (__builtin_expect(free_dispatch_stats_enabled(), 0)) {
|
|
|
|
|
|
switch (fg.domain) {
|
|
|
|
|
|
case FG_DOMAIN_TINY:
|
|
|
|
|
|
g_free_dispatch_stats.domain_tiny++;
|
|
|
|
|
|
break;
|
|
|
|
|
|
case FG_DOMAIN_MIDCAND:
|
|
|
|
|
|
case FG_DOMAIN_POOL:
|
|
|
|
|
|
g_free_dispatch_stats.domain_mid++;
|
|
|
|
|
|
break;
|
|
|
|
|
|
case FG_DOMAIN_EXTERNAL:
|
|
|
|
|
|
g_free_dispatch_stats.domain_large++;
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-11-15 23:00:21 +09:00
|
|
|
|
switch (fg.domain) {
|
|
|
|
|
|
case FG_DOMAIN_TINY: {
|
Phase FREE-FRONT-V3-1: Free route snapshot infrastructure + build fix
Summary:
========
Implemented Phase FREE-FRONT-V3 infrastructure to optimize free hotpath by:
1. Creating snapshot-based route decision table (consolidating route logic)
2. Removing redundant ENV checks from hot path
3. Preparing for future integration into hak_free_at()
Key Changes:
============
1. NEW FILES:
- core/box/free_front_v3_env_box.h: Route snapshot definition & API
- core/box/free_front_v3_env_box.c: Snapshot initialization & caching
2. Infrastructure Details:
- FreeRouteSnapshotV3: Maps class_idx → free_route_kind for all 8 classes
- Routes defined: LEGACY, TINY_V3, CORE_V6_C6, POOL_V1
- ENV-gated initialization (HAKMEM_TINY_FREE_FRONT_V3_ENABLED, default OFF)
- Per-thread TLS caching to avoid repeated ENV reads
3. Design Goals:
- Consolidate tiny_route_for_class() results into snapshot table
- Remove C7 ULTRA / v4 / v5 / v6 ENV checks from hot path
- Limit lookup (ss_fast_lookup/slab_index_for) to paths that truly need it
- Clear ownership boundary: front v3 handles routing, downstream handles free
4. Phase Plan:
- v3-1 ✅ COMPLETE: Infrastructure (snapshot table, ENV initialization, TLS cache)
- v3-2 (INFRASTRUCTURE ONLY): Placeholder integration in hak_free_api.inc.h
- v3-3 (FUTURE): Full integration + benchmark A/B to measure hotpath improvement
5. BUILD FIX:
- Added missing core/box/c7_meta_used_counter_box.o to OBJS_BASE in Makefile
- This symbol was referenced but not linked, causing undefined reference errors
- Benchmark targets now build cleanly without LTO
Status:
=======
- Build: ✅ PASS (bench_allocators_hakmem builds without errors)
- Integration: Currently DISABLED (default OFF, ready for v3-2 phase)
- No performance impact: Infrastructure-only, hotpath unchanged
Future Work:
============
- Phase v3-2: Integrate snapshot routing into hak_free_at() main path
- Phase v3-3: Measure free hotpath performance improvement (target: 1-2% less branch mispredict)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 19:17:30 +09:00
|
|
|
|
// Phase FREE-FRONT-V3-2: v3 snapshot routing (optional, default OFF)
|
|
|
|
|
|
// Optimized: No tiny_route_for_class() calls, no redundant ENV checks
|
2025-12-16 15:01:56 +09:00
|
|
|
|
// Phase 39: BENCH_MINIMAL → compile-out (GO +1.98%)
|
|
|
|
|
|
#if HAKMEM_TINY_HEADER_CLASSIDX && !HAKMEM_BENCH_MINIMAL
|
Phase FREE-FRONT-V3-1: Free route snapshot infrastructure + build fix
Summary:
========
Implemented Phase FREE-FRONT-V3 infrastructure to optimize free hotpath by:
1. Creating snapshot-based route decision table (consolidating route logic)
2. Removing redundant ENV checks from hot path
3. Preparing for future integration into hak_free_at()
Key Changes:
============
1. NEW FILES:
- core/box/free_front_v3_env_box.h: Route snapshot definition & API
- core/box/free_front_v3_env_box.c: Snapshot initialization & caching
2. Infrastructure Details:
- FreeRouteSnapshotV3: Maps class_idx → free_route_kind for all 8 classes
- Routes defined: LEGACY, TINY_V3, CORE_V6_C6, POOL_V1
- ENV-gated initialization (HAKMEM_TINY_FREE_FRONT_V3_ENABLED, default OFF)
- Per-thread TLS caching to avoid repeated ENV reads
3. Design Goals:
- Consolidate tiny_route_for_class() results into snapshot table
- Remove C7 ULTRA / v4 / v5 / v6 ENV checks from hot path
- Limit lookup (ss_fast_lookup/slab_index_for) to paths that truly need it
- Clear ownership boundary: front v3 handles routing, downstream handles free
4. Phase Plan:
- v3-1 ✅ COMPLETE: Infrastructure (snapshot table, ENV initialization, TLS cache)
- v3-2 (INFRASTRUCTURE ONLY): Placeholder integration in hak_free_api.inc.h
- v3-3 (FUTURE): Full integration + benchmark A/B to measure hotpath improvement
5. BUILD FIX:
- Added missing core/box/c7_meta_used_counter_box.o to OBJS_BASE in Makefile
- This symbol was referenced but not linked, causing undefined reference errors
- Benchmark targets now build cleanly without LTO
Status:
=======
- Build: ✅ PASS (bench_allocators_hakmem builds without errors)
- Integration: Currently DISABLED (default OFF, ready for v3-2 phase)
- No performance impact: Infrastructure-only, hotpath unchanged
Future Work:
============
- Phase v3-2: Integrate snapshot routing into hak_free_at() main path
- Phase v3-3: Measure free hotpath performance improvement (target: 1-2% less branch mispredict)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 19:17:30 +09:00
|
|
|
|
{
|
|
|
|
|
|
// Check if v3 snapshot routing is enabled (cached)
|
|
|
|
|
|
static int g_v3_enabled = -1;
|
|
|
|
|
|
if (__builtin_expect(g_v3_enabled == -1, 0)) {
|
|
|
|
|
|
// For now, v3 snapshot routing is DISABLED by default (experimental)
|
|
|
|
|
|
// Phase v3-2 infrastructure is ready but not yet integrated
|
|
|
|
|
|
g_v3_enabled = 0; // TODO: Enable when ready: free_front_v3_enabled() ? 1 : 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Note: v3 snapshot path currently disabled (Phase v3-2 infrastructure only)
|
|
|
|
|
|
// When enabled, it would consolidate free routing logic and remove redundant
|
|
|
|
|
|
// ENV checks from the hot path. For now, use legacy routing below.
|
|
|
|
|
|
(void)g_v3_enabled; // Suppress unused variable warning
|
|
|
|
|
|
}
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
// Legacy path (default when v3 is OFF)
|
2025-11-15 23:00:21 +09:00
|
|
|
|
// Fast path: Tiny (C0-C7) with 1-byte header (0xa0 | class_idx)
|
2025-11-08 03:18:17 +09:00
|
|
|
|
#if HAKMEM_TINY_HEADER_CLASSIDX
|
2025-12-04 11:58:37 +09:00
|
|
|
|
if (__builtin_expect(tiny_free_gate_try_fast(ptr), 1)) {
|
2025-11-08 03:18:17 +09:00
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
2025-11-10 16:48:20 +09:00
|
|
|
|
hak_free_v2_track_fast();
|
2025-11-08 03:18:17 +09:00
|
|
|
|
#endif
|
2025-11-08 04:50:41 +09:00
|
|
|
|
goto done;
|
2025-11-08 03:46:35 +09:00
|
|
|
|
}
|
2025-11-08 03:18:17 +09:00
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
2025-11-10 16:48:20 +09:00
|
|
|
|
hak_free_v2_track_slow();
|
2025-11-08 03:18:17 +09:00
|
|
|
|
#endif
|
|
|
|
|
|
#endif
|
2025-11-10 16:48:20 +09:00
|
|
|
|
hak_tiny_free(ptr);
|
|
|
|
|
|
goto done;
|
|
|
|
|
|
}
|
2025-11-08 03:18:17 +09:00
|
|
|
|
|
2025-11-10 16:48:20 +09:00
|
|
|
|
#ifdef HAKMEM_POOL_TLS_PHASE1
|
2025-11-15 23:00:21 +09:00
|
|
|
|
case FG_DOMAIN_POOL: {
|
|
|
|
|
|
// Pool TLS: 8KB-52KB allocations with 1-byte header (0xb0 | class_idx)
|
2025-11-10 16:48:20 +09:00
|
|
|
|
pool_free(ptr);
|
|
|
|
|
|
goto done;
|
2025-11-07 01:27:04 +09:00
|
|
|
|
}
|
2025-11-08 03:18:17 +09:00
|
|
|
|
#endif
|
2025-11-07 01:27:04 +09:00
|
|
|
|
|
2025-12-10 09:08:18 +09:00
|
|
|
|
case FG_DOMAIN_POOL:
|
2025-11-15 23:00:21 +09:00
|
|
|
|
case FG_DOMAIN_MIDCAND:
|
|
|
|
|
|
case FG_DOMAIN_EXTERNAL:
|
|
|
|
|
|
// Fall through to registry lookup + AllocHeader dispatch
|
|
|
|
|
|
break;
|
2025-11-10 16:48:20 +09:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ========== Slow Path: 16-byte AllocHeader Dispatch ==========
|
|
|
|
|
|
// Handle Mid/Large allocations (malloc/mmap/Pool/L25)
|
|
|
|
|
|
// Note: All Tiny allocations (C0-C7) already handled by Front Gate above
|
|
|
|
|
|
|
2025-11-15 23:00:21 +09:00
|
|
|
|
// ========== Mid/L25/Tiny Registry Lookup (Headerless) ==========
|
|
|
|
|
|
// MIDCAND: Could be Mid/Large/C7, needs registry lookup
|
2025-12-12 01:04:55 +09:00
|
|
|
|
|
2025-12-13 05:35:46 +09:00
|
|
|
|
// Phase FREE-DISPATCH-SSOT: Single Source of Truth for region lookup
|
|
|
|
|
|
// ENV: HAKMEM_FREE_DISPATCH_SSOT (default: 0 for backward compat, 1 for optimized)
|
|
|
|
|
|
// Problem: Old code did region_id_lookup TWICE in MID-V3 path (once inside mid_hot_v3_free, once after)
|
|
|
|
|
|
// Fix: Do lookup ONCE at top, dispatch based on kind
|
|
|
|
|
|
static int g_free_dispatch_ssot = -1;
|
|
|
|
|
|
if (__builtin_expect(g_free_dispatch_ssot == -1, 0)) {
|
|
|
|
|
|
const char* env = getenv("HAKMEM_FREE_DISPATCH_SSOT");
|
|
|
|
|
|
g_free_dispatch_ssot = (env && *env == '1') ? 1 : 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (g_free_dispatch_ssot && __builtin_expect(mid_v3_enabled(), 0)) {
|
|
|
|
|
|
// SSOT=1: Single lookup, then dispatch
|
|
|
|
|
|
extern RegionLookupV6 region_id_lookup_cached_v6(void* ptr);
|
|
|
|
|
|
RegionLookupV6 lk = region_id_lookup_cached_v6(ptr);
|
|
|
|
|
|
|
|
|
|
|
|
if (lk.kind == REGION_KIND_MID_V3) {
|
|
|
|
|
|
// Owned by MID-V3: call free handler directly (no internal lookup)
|
|
|
|
|
|
// Note: We pass the pre-looked-up info implicitly via TLS cache
|
|
|
|
|
|
mid_hot_v3_free(ptr);
|
|
|
|
|
|
|
|
|
|
|
|
if (mid_v3_debug_enabled()) {
|
|
|
|
|
|
static _Atomic int free_log_count = 0;
|
|
|
|
|
|
if (atomic_fetch_add(&free_log_count, 1) < 10) {
|
|
|
|
|
|
fprintf(stderr, "[MID_V3] Free SSOT: ptr=%p\n", ptr);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
goto done;
|
|
|
|
|
|
}
|
|
|
|
|
|
// Not MID-V3: fall through to other dispatch paths below
|
|
|
|
|
|
} else if (__builtin_expect(mid_v3_enabled(), 0)) {
|
|
|
|
|
|
// SSOT=0: Legacy double-lookup path (for A/B comparison)
|
2025-12-12 01:04:55 +09:00
|
|
|
|
// RegionIdBox lookup to check if v3 owns this pointer
|
|
|
|
|
|
// mid_hot_v3_free() will check internally and return early if not owned
|
|
|
|
|
|
mid_hot_v3_free(ptr);
|
|
|
|
|
|
|
|
|
|
|
|
// Check if v3 actually owned it by doing a quick verification
|
|
|
|
|
|
// For safety, check ownership explicitly before continuing
|
|
|
|
|
|
// This prevents double-free if v3 handled it
|
|
|
|
|
|
extern RegionLookupV6 region_id_lookup_v6(void* ptr);
|
|
|
|
|
|
RegionLookupV6 lk = region_id_lookup_v6(ptr);
|
|
|
|
|
|
if (lk.kind == REGION_KIND_MID_V3) {
|
|
|
|
|
|
if (mid_v3_debug_enabled()) {
|
|
|
|
|
|
static _Atomic int free_log_count = 0;
|
|
|
|
|
|
if (atomic_fetch_add(&free_log_count, 1) < 10) {
|
|
|
|
|
|
fprintf(stderr, "[MID_V3] Free: ptr=%p\n", ptr);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
goto done;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-11-07 01:27:04 +09:00
|
|
|
|
{
|
|
|
|
|
|
extern int hak_pool_mid_lookup(void* ptr, size_t* out_size);
|
|
|
|
|
|
extern void hak_pool_free_fast(void* ptr, uintptr_t site_id);
|
2025-11-15 23:00:21 +09:00
|
|
|
|
size_t mid_sz = 0;
|
|
|
|
|
|
if (hak_pool_mid_lookup(ptr, &mid_sz)) {
|
|
|
|
|
|
hak_free_route_log("mid_hit", ptr);
|
|
|
|
|
|
hak_pool_free_fast(ptr, (uintptr_t)site);
|
|
|
|
|
|
goto done;
|
|
|
|
|
|
}
|
2025-11-07 01:27:04 +09:00
|
|
|
|
}
|
|
|
|
|
|
{
|
|
|
|
|
|
extern int hak_l25_lookup(void* ptr, size_t* out_size);
|
|
|
|
|
|
extern void hak_l25_pool_free_fast(void* ptr, uintptr_t site_id);
|
2025-11-15 23:00:21 +09:00
|
|
|
|
size_t l25_sz = 0;
|
|
|
|
|
|
if (hak_l25_lookup(ptr, &l25_sz)) {
|
|
|
|
|
|
hak_free_route_log("l25_hit", ptr);
|
|
|
|
|
|
hkm_ace_stat_large_free();
|
|
|
|
|
|
hak_l25_pool_free_fast(ptr, (uintptr_t)site);
|
|
|
|
|
|
goto done;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
// PHASE 15: C7 (1KB headerless) registry lookup
|
|
|
|
|
|
// Box FG V2 cannot classify C7 (no header), so use registry
|
|
|
|
|
|
{
|
|
|
|
|
|
SuperSlab* ss = hak_super_lookup(ptr);
|
|
|
|
|
|
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
|
|
|
|
|
hak_free_route_log("tiny_c7_registry", ptr);
|
|
|
|
|
|
hak_tiny_free(ptr);
|
|
|
|
|
|
goto done;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-11-07 01:27:04 +09:00
|
|
|
|
// Raw header dispatch(mmap/malloc/BigCacheなど)
|
|
|
|
|
|
{
|
|
|
|
|
|
void* raw = (char*)ptr - HEADER_SIZE;
|
2025-11-07 17:34:24 +09:00
|
|
|
|
|
2025-11-29 09:04:32 +09:00
|
|
|
|
// Phase 3 (2025-11-29): mincore() completely removed
|
2025-11-14 06:32:38 +09:00
|
|
|
|
//
|
2025-11-29 09:04:32 +09:00
|
|
|
|
// History:
|
|
|
|
|
|
// - Phase 9: Originally used mincore() syscall to verify memory accessibility
|
|
|
|
|
|
// - 2025-11-14: Added DISABLE_MINCORE flag for performance (+10.3% improvement)
|
|
|
|
|
|
// - Phase 1b/2: Registry-based validation provides sufficient safety
|
|
|
|
|
|
// - Phase 3: Dead code removal - mincore no longer needed
|
|
|
|
|
|
//
|
|
|
|
|
|
// Safety: Trust internal metadata (registry/headers/FrontGate classification)
|
|
|
|
|
|
// - SuperSlab registry validates all Tiny allocations (Phase 1b/2)
|
|
|
|
|
|
// - Headers validate Mid/Large allocations
|
|
|
|
|
|
// - FrontGate classifier routes external allocations correctly
|
|
|
|
|
|
int is_mapped = 1;
|
2025-11-14 06:09:02 +09:00
|
|
|
|
|
|
|
|
|
|
if (!is_mapped) {
|
2025-11-07 17:34:24 +09:00
|
|
|
|
// Memory not accessible, ptr likely has no header
|
|
|
|
|
|
hak_free_route_log("unmapped_header_fallback", ptr);
|
2025-12-01 16:05:55 +09:00
|
|
|
|
// Always punt to libc; never route unmapped/unknown pointers to Tiny
|
2025-11-07 17:34:24 +09:00
|
|
|
|
extern void __libc_free(void*);
|
2025-11-11 00:02:24 +09:00
|
|
|
|
ptr_trace_dump_now("free_api_libc_invalid_hdr");
|
2025-11-07 17:34:24 +09:00
|
|
|
|
__libc_free(ptr);
|
|
|
|
|
|
goto done;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Safe to dereference header now
|
2025-11-07 01:27:04 +09:00
|
|
|
|
AllocHeader* hdr = (AllocHeader*)raw;
|
|
|
|
|
|
if (hdr->magic != HAKMEM_MAGIC) {
|
2025-11-07 17:34:24 +09:00
|
|
|
|
// CRITICAL FIX (2025-11-07): Invalid magic could mean:
|
|
|
|
|
|
// 1. Tiny allocation where SuperSlab lookup failed (NO header exists)
|
|
|
|
|
|
// 2. Libc allocation from mixed environment
|
|
|
|
|
|
// 3. Double-free or corrupted pointer
|
|
|
|
|
|
|
2025-11-07 01:27:04 +09:00
|
|
|
|
if (g_invalid_free_log) fprintf(stderr, "[hakmem] ERROR: Invalid magic 0x%X (expected 0x%X)\n", hdr->magic, HAKMEM_MAGIC);
|
2025-11-07 17:34:24 +09:00
|
|
|
|
|
|
|
|
|
|
// One-shot request-trace to help diagnose SS registry lookups
|
|
|
|
|
|
hak_super_reg_reqtrace_dump(ptr);
|
|
|
|
|
|
|
2025-12-01 16:05:55 +09:00
|
|
|
|
// Fail-fast diagnostics: never hand bad headers to Tiny or libc silently
|
|
|
|
|
|
SuperSlab* ss_diag = hak_super_lookup(ptr);
|
|
|
|
|
|
int slab_diag = ss_diag ? slab_index_for(ss_diag, ptr) : -1;
|
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
|
"[INVALID_MAGIC_FREE] ptr=%p magic=0x%X mode=%d ss=%p slab=%d\n",
|
|
|
|
|
|
ptr, hdr->magic, g_invalid_free_mode, (void*)ss_diag, slab_diag);
|
|
|
|
|
|
tiny_guard_on_invalid(ptr, hdr->magic);
|
|
|
|
|
|
|
|
|
|
|
|
// If this pointer was a misclassified Tiny header miss, punt to libc to avoid corrupting TLS
|
|
|
|
|
|
if (fg_misclass) {
|
|
|
|
|
|
fprintf(stderr, "[FREE_MISCLASS_SKIP] ptr=%p hdr=0x%x (ignored to avoid corruption)\n",
|
|
|
|
|
|
ptr, hdr->magic);
|
|
|
|
|
|
goto done; // leak-safe skip: not our allocation
|
2025-11-07 17:34:24 +09:00
|
|
|
|
}
|
|
|
|
|
|
|
2025-12-01 16:05:55 +09:00
|
|
|
|
// Never route invalid headers into Tiny; fail-fast by default
|
2025-11-07 17:34:24 +09:00
|
|
|
|
if (g_invalid_free_mode) {
|
|
|
|
|
|
static int leak_warn = 0;
|
|
|
|
|
|
if (!leak_warn) {
|
|
|
|
|
|
fprintf(stderr, "[hakmem] WARNING: Skipping free of invalid pointer %p (may leak memory)\n", ptr);
|
|
|
|
|
|
leak_warn = 1;
|
|
|
|
|
|
}
|
2025-12-01 16:05:55 +09:00
|
|
|
|
abort();
|
2025-11-07 17:34:24 +09:00
|
|
|
|
} else {
|
2025-12-01 16:05:55 +09:00
|
|
|
|
ptr_trace_dump_now("free_api_invalid_magic_failfast");
|
|
|
|
|
|
abort();
|
2025-11-07 17:34:24 +09:00
|
|
|
|
}
|
2025-11-07 01:27:04 +09:00
|
|
|
|
}
|
2025-11-29 14:39:07 +09:00
|
|
|
|
// Phase 5-Step3: Use Mid/Large Config Box (compile-time constant in PGO mode)
|
|
|
|
|
|
if (MID_LARGE_BIGCACHE_ENABLED && hdr->class_bytes >= 2097152) {
|
2025-11-07 01:27:04 +09:00
|
|
|
|
if (hak_bigcache_put(ptr, hdr->size, hdr->alloc_site)) goto done;
|
|
|
|
|
|
}
|
|
|
|
|
|
{
|
|
|
|
|
|
static int g_bc_l25_en_free = -1; if (g_bc_l25_en_free == -1) { const char* e = getenv("HAKMEM_BIGCACHE_L25"); g_bc_l25_en_free = (e && atoi(e) != 0) ? 1 : 0; }
|
2025-11-29 14:39:07 +09:00
|
|
|
|
if (g_bc_l25_en_free && MID_LARGE_BIGCACHE_ENABLED && hdr->size >= 524288 && hdr->size < 2097152) {
|
2025-11-07 01:27:04 +09:00
|
|
|
|
if (hak_bigcache_put(ptr, hdr->size, hdr->alloc_site)) goto done;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
switch (hdr->method) {
|
|
|
|
|
|
case ALLOC_METHOD_POOL: if (HAK_ENABLED_ALLOC(HAKMEM_FEATURE_POOL)) { hkm_ace_stat_mid_free(); hak_pool_free(ptr, hdr->size, hdr->alloc_site); goto done; } break;
|
|
|
|
|
|
case ALLOC_METHOD_L25_POOL: hkm_ace_stat_large_free(); hak_l25_pool_free(ptr, hdr->size, hdr->alloc_site); goto done;
|
2025-11-07 02:48:20 +09:00
|
|
|
|
case ALLOC_METHOD_MALLOC:
|
|
|
|
|
|
// CRITICAL FIX: raw was allocated with __libc_malloc, so free with __libc_free
|
|
|
|
|
|
// Using free(raw) would go through wrapper → infinite recursion
|
|
|
|
|
|
hak_free_route_log("malloc_hdr", ptr);
|
|
|
|
|
|
extern void __libc_free(void*);
|
2025-11-11 00:02:24 +09:00
|
|
|
|
ptr_trace_dump_now("free_api_libc_malloc_hdr");
|
2025-12-01 16:05:55 +09:00
|
|
|
|
fprintf(stderr, "[FREE_LIBC_HDR] raw=%p user=%p size=%zu method=%d magic=0x%X\n",
|
|
|
|
|
|
raw, ptr, hdr->size, (int)hdr->method, hdr->magic);
|
2025-11-07 02:48:20 +09:00
|
|
|
|
__libc_free(raw);
|
|
|
|
|
|
break;
|
2025-11-07 01:27:04 +09:00
|
|
|
|
case ALLOC_METHOD_MMAP:
|
|
|
|
|
|
#ifdef __linux__
|
|
|
|
|
|
if (HAK_ENABLED_MEMORY(HAKMEM_FEATURE_BATCH_MADVISE) && hdr->size >= BATCH_MIN_SIZE) { hak_batch_add(raw, hdr->size); goto done; }
|
|
|
|
|
|
if (hkm_whale_put(raw, hdr->size) != 0) { hkm_sys_munmap(raw, hdr->size); }
|
|
|
|
|
|
#else
|
2025-11-07 02:48:20 +09:00
|
|
|
|
// CRITICAL FIX: Same as ALLOC_METHOD_MALLOC
|
|
|
|
|
|
extern void __libc_free(void*);
|
2025-11-11 00:02:24 +09:00
|
|
|
|
ptr_trace_dump_now("free_api_libc_mmap_other");
|
2025-11-07 02:48:20 +09:00
|
|
|
|
__libc_free(raw);
|
2025-11-07 01:27:04 +09:00
|
|
|
|
#endif
|
|
|
|
|
|
break;
|
2025-11-11 01:47:06 +09:00
|
|
|
|
default: HAKMEM_LOG("ERROR: Unknown allocation method: %d\n", hdr->method); break;
|
2025-11-07 01:27:04 +09:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
done:
|
|
|
|
|
|
#if HAKMEM_DEBUG_TIMING
|
|
|
|
|
|
HKM_TIME_END(HKM_CAT_HAK_FREE, t0);
|
|
|
|
|
|
#endif
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#endif // HAK_FREE_API_INC_H
|