Phase ALLOC-GATE-OPT-1: tiny_alloc_gate_fast 統計計測
- AllocGateStats 構造体追加(size2class/route/env/class分布) - malloc_tiny_fast にカウンタ埋め込み - ENV: HAKMEM_ALLOC_GATE_STATS (default 0) - 挙動変更なし(計測のみ) 計測結果: - Mixed: total=542k, size2class=0, route_calls=0, env_checks=275k, C4-C7=95.2% - size_to_class/route_for_class は完全削減済み(LUT 効果) - C4-C7 が 95% → ULTRA fast path が有効 - env_checks ≈ c7_calls → C7 ULTRA の ENV gate が毎回呼ばれる - C6-heavy: total=11 → malloc_tiny_fast はほぼ通らない(mid/pool 主体) 結論: - alloc gate は既に十分最適化済み(LUT + ULTRA で削減済み) - さらなる最適化余地は小さい(env_checks は軽量化済み、数%以下の効果) - 次フェーズでは free dispatcher (29%) や C7 ULTRA refill (7%) など、他のボトルネックを狙う 詳細: docs/analysis/ALLOC_GATE_ANALYSIS.md 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
@ -57,6 +57,7 @@
|
||||
#include "../box/tiny_route_env_box.h" // Route snapshot (Heap vs Legacy)
|
||||
#include "../box/tiny_front_stats_box.h" // Front class distribution counters
|
||||
#include "../box/free_path_stats_box.h" // Phase FREE-LEGACY-BREAKDOWN-1: Free path stats
|
||||
#include "../box/alloc_gate_stats_box.h" // Phase ALLOC-GATE-OPT-1: Alloc gate stats
|
||||
|
||||
// Helper: current thread id (low 32 bits) for owner check
|
||||
#ifndef TINY_SELF_U32_LOCAL_DEFINED
|
||||
@ -123,6 +124,9 @@ static inline int front_gate_unified_enabled(void) {
|
||||
//
|
||||
__attribute__((always_inline))
|
||||
static inline void* malloc_tiny_fast(size_t size) {
|
||||
// Phase ALLOC-GATE-OPT-1: カウンタ散布 (1. 関数入口)
|
||||
ALLOC_GATE_STAT_INC(total_calls);
|
||||
|
||||
const int front_v3_on = tiny_front_v3_enabled();
|
||||
const TinyFrontV3Snapshot* front_snap =
|
||||
__builtin_expect(front_v3_on, 0) ? tiny_front_v3_snapshot_get() : NULL;
|
||||
@ -143,10 +147,14 @@ static inline void* malloc_tiny_fast(size_t size) {
|
||||
}
|
||||
|
||||
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
|
||||
// Phase ALLOC-GATE-OPT-1: カウンタ散布 (2. size→class 変換)
|
||||
ALLOC_GATE_STAT_INC(size_to_class_calls);
|
||||
class_idx = hak_tiny_size_to_class(size);
|
||||
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
|
||||
return NULL;
|
||||
}
|
||||
// Phase ALLOC-GATE-OPT-1: カウンタ散布 (3. route_for_class 呼び出し)
|
||||
ALLOC_GATE_STAT_INC(route_for_class_calls);
|
||||
route = tiny_route_for_class((uint8_t)class_idx);
|
||||
route_trusted = false;
|
||||
} else if (!route_trusted &&
|
||||
@ -154,13 +162,20 @@ static inline void* malloc_tiny_fast(size_t size) {
|
||||
route != TINY_ROUTE_HOTHEAP_V2 && route != TINY_ROUTE_SMALL_HEAP_V3 &&
|
||||
route != TINY_ROUTE_SMALL_HEAP_V4 && route != TINY_ROUTE_SMALL_HEAP_V5 &&
|
||||
route != TINY_ROUTE_SMALL_HEAP_V6) {
|
||||
// Phase ALLOC-GATE-OPT-1: カウンタ散布 (3. route_for_class 呼び出し)
|
||||
ALLOC_GATE_STAT_INC(route_for_class_calls);
|
||||
route = tiny_route_for_class((uint8_t)class_idx);
|
||||
}
|
||||
|
||||
tiny_front_alloc_stat_inc(class_idx);
|
||||
|
||||
// Phase ALLOC-GATE-OPT-1: カウンタ散布 (4. クラス別分布)
|
||||
ALLOC_GATE_STAT_INC_CLASS(class_idx);
|
||||
|
||||
// C7 ULTRA allocation path (ENV: HAKMEM_TINY_C7_ULTRA_ENABLED, default ON)
|
||||
if (tiny_class_is_c7(class_idx) && tiny_c7_ultra_enabled_env()) {
|
||||
// Phase ALLOC-GATE-OPT-1: カウンタ散布 (5. C7 ULTRA ENV check)
|
||||
ALLOC_GATE_STAT_INC(env_checks);
|
||||
void* ultra_p = tiny_c7_ultra_alloc(size);
|
||||
if (TINY_HOT_LIKELY(ultra_p != NULL)) {
|
||||
return ultra_p;
|
||||
@ -170,6 +185,8 @@ static inline void* malloc_tiny_fast(size_t size) {
|
||||
|
||||
// Phase 4-4: C6 ULTRA free+alloc integration (寄生型 TLS キャッシュ pop)
|
||||
if (tiny_class_is_c6(class_idx) && tiny_c6_ultra_free_enabled()) {
|
||||
// Phase ALLOC-GATE-OPT-1: カウンタ散布 (5. C6 ULTRA ENV check)
|
||||
ALLOC_GATE_STAT_INC(env_checks);
|
||||
TinyC6UltraFreeTLS* ctx = tiny_c6_ultra_free_tls();
|
||||
if (TINY_HOT_LIKELY(ctx->count > 0)) {
|
||||
void* base = ctx->freelist[--ctx->count];
|
||||
@ -183,6 +200,8 @@ static inline void* malloc_tiny_fast(size_t size) {
|
||||
|
||||
// Phase 5-2: C5 ULTRA free+alloc integration (same pattern as C6)
|
||||
if (tiny_class_is_c5(class_idx) && tiny_c5_ultra_free_enabled()) {
|
||||
// Phase ALLOC-GATE-OPT-1: カウンタ散布 (5. C5 ULTRA ENV check)
|
||||
ALLOC_GATE_STAT_INC(env_checks);
|
||||
TinyC5UltraFreeTLS* ctx = tiny_c5_ultra_free_tls();
|
||||
if (TINY_HOT_LIKELY(ctx->count > 0)) {
|
||||
void* base = ctx->freelist[--ctx->count];
|
||||
@ -193,6 +212,8 @@ static inline void* malloc_tiny_fast(size_t size) {
|
||||
|
||||
// Phase 6: C4 ULTRA free+alloc integration (same pattern as C5/C6, cap=64)
|
||||
if (tiny_class_is_c4(class_idx) && tiny_c4_ultra_free_enabled()) {
|
||||
// Phase ALLOC-GATE-OPT-1: カウンタ散布 (5. C4 ULTRA ENV check)
|
||||
ALLOC_GATE_STAT_INC(env_checks);
|
||||
TinyC4UltraFreeTLS* ctx = tiny_c4_ultra_free_tls();
|
||||
if (TINY_HOT_LIKELY(ctx->count > 0)) {
|
||||
void* base = ctx->freelist[--ctx->count];
|
||||
|
||||
Reference in New Issue
Block a user