Phase 74-3: P0 (FASTAPI) - Free-path branch reduction optimization

Implements FASTAPI pattern to move enabled/init/stats checks outside hot loop:
- Added unified_cache_push_fast() fast-path API (no precondition checks)
- Modified tiny_legacy_fallback_free_base_with_env() to use FASTAPI with ENV gate
- Single boundary point: fallback to slow path if FULL or FASTAPI disabled
- ENV gate: HAKMEM_TINY_UC_FASTAPI=0/1 (default 0, research box)

Results (10-run Mixed SSOT, WS=400):
- Throughput: +0.32% (NEUTRAL, below +1.0% GO threshold)
- cache-misses: -16.31% (positive signal, but throughput gains insufficient)

Status: NEUTRAL, P0 FROZEN (insufficient ROI for structural change)
Next: Phase 74-4 (P2: Hot-class Inline Slots) pending per-class analysis

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-12-18 08:07:46 +09:00
parent e9b97e9d8e
commit 65f982aeec
3 changed files with 73 additions and 0 deletions

View File

@ -11,6 +11,7 @@
#include "tiny_front_hot_box.h"
#include "tiny_metadata_cache_env_box.h" // Phase 3 C2: Metadata cache ENV gate
#include "hakmem_env_snapshot_box.h" // Phase 4 E1: ENV snapshot consolidation
#include "tiny_unified_cache_fastapi_env_box.h" // Phase 74-3: FASTAPI ENV gate
// Purpose: Encapsulate legacy free logic (shared by multiple paths)
// Called by: malloc_tiny_fast.h (free path) + tiny_c6_ultra_free_box.c (C6 fallback)
@ -40,6 +41,27 @@ static inline void tiny_legacy_fallback_free_base_with_env(void* base, uint32_t
// Legacy fallback - Unified Cache push
if (!front_snap || front_snap->unified_cache_on) {
// Phase 74-3 (P0): FASTAPI path (ENV-gated)
if (tiny_uc_fastapi_enabled()) {
// Preconditions guaranteed:
// - unified_cache_on == true (checked above)
// - TLS init guaranteed by front_gate_unified_enabled() in malloc_tiny_fast.h
// - Stats compiled-out in FAST builds
if (unified_cache_push_fast(class_idx, HAK_BASE_FROM_RAW(base))) {
FREE_PATH_STAT_INC(legacy_fallback);
// Per-class breakdown (Phase 4-1)
if (__builtin_expect(free_path_stats_enabled(), 0)) {
if (class_idx < 8) {
g_free_path_stats.legacy_by_class[class_idx]++;
}
}
return;
}
// FULL → fallback to slow path (rare)
}
// Original path (FASTAPI=0 or fallback)
if (unified_cache_push(class_idx, HAK_BASE_FROM_RAW(base))) {
FREE_PATH_STAT_INC(legacy_fallback);

View File

@ -0,0 +1,18 @@
// ENV gate for FASTAPI optimization
// HAKMEM_TINY_UC_FASTAPI=0/1 (default 0)
#ifndef HAK_BOX_TINY_UNIFIED_CACHE_FASTAPI_ENV_BOX_H
#define HAK_BOX_TINY_UNIFIED_CACHE_FASTAPI_ENV_BOX_H
#include <stdlib.h>
static inline int tiny_uc_fastapi_enabled(void) {
static int g_enabled = -1;
if (__builtin_expect(g_enabled == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_UC_FASTAPI");
g_enabled = (e && *e && *e != '0') ? 1 : 0;
}
return g_enabled;
}
#endif

View File

@ -294,6 +294,39 @@ static inline int unified_cache_push(int class_idx, hak_base_ptr_t base) {
#endif // HAKMEM_TINY_UC_LOCALIZE_COMPILED
}
// ============================================================================
// Phase 74-3 (P0): FASTAPI - Fast-path push (assumes preconditions met)
// ============================================================================
// Preconditions (caller must ensure):
// - Unified cache is enabled (TINY_FRONT_UNIFIED_CACHE_ENABLED == 1)
// - TLS cache is initialized (cache->slots != NULL)
// - Stats are compiled-out or caller doesn't need them
// Returns: 1=SUCCESS, 0=FULL (same as unified_cache_push)
static inline int unified_cache_push_fast(int class_idx, hak_base_ptr_t base) {
void* base_raw = HAK_BASE_TO_RAW(base);
TinyUnifiedCache* cache = &g_unified_cache[class_idx];
uint16_t next_tail = (cache->tail + 1) & cache->mask;
// Full check (leave 1 slot empty to distinguish full/empty)
if (__builtin_expect(next_tail == cache->head, 0)) {
#if !HAKMEM_BUILD_RELEASE || HAKMEM_UNIFIED_CACHE_STATS_COMPILED
g_unified_cache_full[class_idx]++;
#endif
return 0; // Full
}
// Push to tail (producer)
cache->slots[cache->tail] = base_raw;
cache->tail = next_tail;
#if !HAKMEM_BUILD_RELEASE || HAKMEM_UNIFIED_CACHE_STATS_COMPILED
g_unified_cache_push[class_idx]++;
#endif
return 1; // SUCCESS (FASTAPI path)
}
// ============================================================================
// Phase 23-D: Self-Contained Pop-or-Refill (tcache-style, single-layer)
// ============================================================================