Phase 74-3: P0 (FASTAPI) - Free-path branch reduction optimization
Implements FASTAPI pattern to move enabled/init/stats checks outside hot loop: - Added unified_cache_push_fast() fast-path API (no precondition checks) - Modified tiny_legacy_fallback_free_base_with_env() to use FASTAPI with ENV gate - Single boundary point: fallback to slow path if FULL or FASTAPI disabled - ENV gate: HAKMEM_TINY_UC_FASTAPI=0/1 (default 0, research box) Results (10-run Mixed SSOT, WS=400): - Throughput: +0.32% (NEUTRAL, below +1.0% GO threshold) - cache-misses: -16.31% (positive signal, but throughput gains insufficient) Status: NEUTRAL, P0 FROZEN (insufficient ROI for structural change) Next: Phase 74-4 (P2: Hot-class Inline Slots) pending per-class analysis 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
@ -11,6 +11,7 @@
|
||||
#include "tiny_front_hot_box.h"
|
||||
#include "tiny_metadata_cache_env_box.h" // Phase 3 C2: Metadata cache ENV gate
|
||||
#include "hakmem_env_snapshot_box.h" // Phase 4 E1: ENV snapshot consolidation
|
||||
#include "tiny_unified_cache_fastapi_env_box.h" // Phase 74-3: FASTAPI ENV gate
|
||||
|
||||
// Purpose: Encapsulate legacy free logic (shared by multiple paths)
|
||||
// Called by: malloc_tiny_fast.h (free path) + tiny_c6_ultra_free_box.c (C6 fallback)
|
||||
@ -40,6 +41,27 @@ static inline void tiny_legacy_fallback_free_base_with_env(void* base, uint32_t
|
||||
|
||||
// Legacy fallback - Unified Cache push
|
||||
if (!front_snap || front_snap->unified_cache_on) {
|
||||
// Phase 74-3 (P0): FASTAPI path (ENV-gated)
|
||||
if (tiny_uc_fastapi_enabled()) {
|
||||
// Preconditions guaranteed:
|
||||
// - unified_cache_on == true (checked above)
|
||||
// - TLS init guaranteed by front_gate_unified_enabled() in malloc_tiny_fast.h
|
||||
// - Stats compiled-out in FAST builds
|
||||
if (unified_cache_push_fast(class_idx, HAK_BASE_FROM_RAW(base))) {
|
||||
FREE_PATH_STAT_INC(legacy_fallback);
|
||||
|
||||
// Per-class breakdown (Phase 4-1)
|
||||
if (__builtin_expect(free_path_stats_enabled(), 0)) {
|
||||
if (class_idx < 8) {
|
||||
g_free_path_stats.legacy_by_class[class_idx]++;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
// FULL → fallback to slow path (rare)
|
||||
}
|
||||
|
||||
// Original path (FASTAPI=0 or fallback)
|
||||
if (unified_cache_push(class_idx, HAK_BASE_FROM_RAW(base))) {
|
||||
FREE_PATH_STAT_INC(legacy_fallback);
|
||||
|
||||
|
||||
18
core/box/tiny_unified_cache_fastapi_env_box.h
Normal file
18
core/box/tiny_unified_cache_fastapi_env_box.h
Normal file
@ -0,0 +1,18 @@
|
||||
// ENV gate for FASTAPI optimization
|
||||
// HAKMEM_TINY_UC_FASTAPI=0/1 (default 0)
|
||||
|
||||
#ifndef HAK_BOX_TINY_UNIFIED_CACHE_FASTAPI_ENV_BOX_H
|
||||
#define HAK_BOX_TINY_UNIFIED_CACHE_FASTAPI_ENV_BOX_H
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
static inline int tiny_uc_fastapi_enabled(void) {
|
||||
static int g_enabled = -1;
|
||||
if (__builtin_expect(g_enabled == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_TINY_UC_FASTAPI");
|
||||
g_enabled = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
return g_enabled;
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -294,6 +294,39 @@ static inline int unified_cache_push(int class_idx, hak_base_ptr_t base) {
|
||||
#endif // HAKMEM_TINY_UC_LOCALIZE_COMPILED
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Phase 74-3 (P0): FASTAPI - Fast-path push (assumes preconditions met)
|
||||
// ============================================================================
|
||||
// Preconditions (caller must ensure):
|
||||
// - Unified cache is enabled (TINY_FRONT_UNIFIED_CACHE_ENABLED == 1)
|
||||
// - TLS cache is initialized (cache->slots != NULL)
|
||||
// - Stats are compiled-out or caller doesn't need them
|
||||
// Returns: 1=SUCCESS, 0=FULL (same as unified_cache_push)
|
||||
static inline int unified_cache_push_fast(int class_idx, hak_base_ptr_t base) {
|
||||
void* base_raw = HAK_BASE_TO_RAW(base);
|
||||
TinyUnifiedCache* cache = &g_unified_cache[class_idx];
|
||||
|
||||
uint16_t next_tail = (cache->tail + 1) & cache->mask;
|
||||
|
||||
// Full check (leave 1 slot empty to distinguish full/empty)
|
||||
if (__builtin_expect(next_tail == cache->head, 0)) {
|
||||
#if !HAKMEM_BUILD_RELEASE || HAKMEM_UNIFIED_CACHE_STATS_COMPILED
|
||||
g_unified_cache_full[class_idx]++;
|
||||
#endif
|
||||
return 0; // Full
|
||||
}
|
||||
|
||||
// Push to tail (producer)
|
||||
cache->slots[cache->tail] = base_raw;
|
||||
cache->tail = next_tail;
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE || HAKMEM_UNIFIED_CACHE_STATS_COMPILED
|
||||
g_unified_cache_push[class_idx]++;
|
||||
#endif
|
||||
|
||||
return 1; // SUCCESS (FASTAPI path)
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Phase 23-D: Self-Contained Pop-or-Refill (tcache-style, single-layer)
|
||||
// ============================================================================
|
||||
|
||||
Reference in New Issue
Block a user