Phase 35-39: FAST build optimization complete (+7.13% cumulative)

Phase 35-A: BENCH_MINIMAL gate function elimination (GO +4.39%)
- tiny_front_v3_enabled() → constant true
- tiny_metadata_cache_enabled() → constant 0
- learner_v7_enabled() → constant false
- small_learner_v2_enabled() → constant false

Phase 36: Policy snapshot init-once (GO +0.71%)
- small_policy_v7_snapshot() version check skip in BENCH_MINIMAL
- TLS cache for policy snapshot

Phase 37: Standard TLS cache (NO-GO -0.07%)
- TLS cache for Standard build attempted
- Runtime gate overhead negates benefit

Phase 38: FAST/OBSERVE/Standard workflow established
- make perf_fast, make perf_observe targets
- Scorecard and documentation updates

Phase 39: Hot path gate constantization (GO +1.98%)
- front_gate_unified_enabled() → constant 1
- alloc_dualhot_enabled() → constant 0
- g_bench_fast_front, g_v3_enabled blocks → compile-out
- free_dispatch_stats_enabled() → constant false

Results:
- FAST v3: 56.04M ops/s (47.4% of mimalloc)
- Standard: 53.50M ops/s (45.3% of mimalloc)
- M1 target (50%): 5.5% remaining

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-12-16 15:01:56 +09:00
parent 506e724c3b
commit b7085c47e1
22 changed files with 1550 additions and 397 deletions

View File

@ -1,6 +1,7 @@
#ifndef HAKMEM_FREE_DISPATCH_STATS_BOX_H
#define HAKMEM_FREE_DISPATCH_STATS_BOX_H
#include "../hakmem_build_flags.h" // Phase 39: HAKMEM_BENCH_MINIMAL (GO +1.98%)
#include <stdint.h>
#include <stdbool.h>
#include <stdlib.h>
@ -26,13 +27,18 @@ typedef struct FreeDispatchStats {
} FreeDispatchStats;
// ENV gate
// Phase 39: BENCH_MINIMAL → 固定 false (GO +1.98%)
static inline bool free_dispatch_stats_enabled(void) {
#if HAKMEM_BENCH_MINIMAL
return false; // FAST v3: 定数化 (stats OFF)
#else
static int g_enabled = -1;
if (__builtin_expect(g_enabled == -1, 0)) {
const char* e = getenv("HAKMEM_FREE_DISPATCH_STATS");
g_enabled = (e && *e && *e != '0') ? 1 : 0;
}
return g_enabled;
#endif
}
// Global stats instance

View File

@ -3,6 +3,7 @@
#ifndef HAK_FREE_API_INC_H
#define HAK_FREE_API_INC_H
#include "../hakmem_build_flags.h" // Phase 39: HAKMEM_BENCH_MINIMAL (GO +1.98%)
#include <sys/mman.h> // For mincore() in AllocHeader safety check
#include "hakmem_tiny_superslab.h" // For SUPERSLAB_MAGIC, SuperSlab
#include "../ptr_trace.h" // Debug: pointer trace immediate dump on libc fallback
@ -112,6 +113,8 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
#endif
// Bench-only ultra-short path: try header-based tiny fast free first
// Enable with: HAKMEM_BENCH_FAST_FRONT=1
// Phase 39: BENCH_MINIMAL → compile-out (GO +1.98%)
#if !HAKMEM_BENCH_MINIMAL
{
static int g_bench_fast_front = -1;
if (__builtin_expect(g_bench_fast_front == -1, 0)) {
@ -129,6 +132,7 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
}
#endif
}
#endif
if (!ptr) {
#if HAKMEM_DEBUG_TIMING
@ -168,7 +172,8 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
case FG_DOMAIN_TINY: {
// Phase FREE-FRONT-V3-2: v3 snapshot routing (optional, default OFF)
// Optimized: No tiny_route_for_class() calls, no redundant ENV checks
#if HAKMEM_TINY_HEADER_CLASSIDX
// Phase 39: BENCH_MINIMAL → compile-out (GO +1.98%)
#if HAKMEM_TINY_HEADER_CLASSIDX && !HAKMEM_BENCH_MINIMAL
{
// Check if v3 snapshot routing is enabled (cached)
static int g_v3_enabled = -1;

View File

@ -0,0 +1,15 @@
// small_policy_snapshot_tls_box.c - Phase 37: Lightweight TLS cache implementation
#include <stdlib.h> // for NULL
#include "small_policy_snapshot_tls_box.h"
// TLS singleton for policy snapshot cache
static __thread SmallPolicySnapshotTLSCache g_policy_snapshot_tls_cache = {
.cached_ptr = NULL,
.cached_version = 0,
.initialized = 0
};
SmallPolicySnapshotTLSCache* small_policy_snapshot_tls_get(void) {
return &g_policy_snapshot_tls_cache;
}

View File

@ -0,0 +1,81 @@
// small_policy_snapshot_tls_box.h - Phase 37: Lightweight TLS cache for policy snapshot
//
// Purpose:
// - Reduce fixed tax from global version read in small_policy_v7_snapshot()
// - Fast path: return cached pointer without global memory access
// - Slow path: refresh only when global version changes
//
// Box Theory:
// - Single Responsibility: TLS caching for policy snapshot
// - Reversible: ENV gate HAKMEM_POLICY_SNAPSHOT_TLS (default ON)
// - Clear Boundary: Only affects small_policy_v7_snapshot() internal
#ifndef SMALL_POLICY_SNAPSHOT_TLS_BOX_H
#define SMALL_POLICY_SNAPSHOT_TLS_BOX_H
#include "../hakmem_build_flags.h"
#include <stdlib.h> // for getenv
#include <stdint.h>
#include <stdbool.h>
// Forward declaration
struct SmallPolicyV7;
// TLS cache state
typedef struct SmallPolicySnapshotTLSCache {
const struct SmallPolicyV7* cached_ptr; // Cached policy pointer
uint32_t cached_version; // Last seen global version
int initialized; // 0 = not init, 1 = initialized
} SmallPolicySnapshotTLSCache;
// ENV gate: default OFF (Phase 37 NO-GO: TLS cache has no benefit)
// Set HAKMEM_POLICY_SNAPSHOT_TLS=1 to enable (research only)
#if HAKMEM_BENCH_MINIMAL
// BENCH_MINIMAL: always use Phase 36 optimization (skip version check entirely)
static inline int policy_snapshot_tls_enabled(void) {
return 0; // Disabled in BENCH_MINIMAL (use simpler Phase 36 path)
}
#else
static inline int policy_snapshot_tls_enabled(void) {
static int g = -1;
if (__builtin_expect(g == -1, 0)) {
const char* e = getenv("HAKMEM_POLICY_SNAPSHOT_TLS");
// Phase 37 NO-GO: default OFF (TLS cache adds overhead, no benefit)
if (e && *e == '1') {
g = 1; // explicitly enabled (research only)
} else {
g = 0; // default OFF
}
}
return g;
}
#endif
// Get TLS cache (thread-local singleton)
SmallPolicySnapshotTLSCache* small_policy_snapshot_tls_get(void);
// Check if TLS cache is valid (fast path: just compare version)
// Returns: 1 if cache is valid and can return cached_ptr, 0 if refresh needed
static inline int small_policy_snapshot_tls_check(
SmallPolicySnapshotTLSCache* cache,
uint32_t global_version
) {
// Fast path: initialized and version matches
if (__builtin_expect(cache->initialized && cache->cached_version == global_version, 1)) {
return 1; // Cache hit
}
return 0; // Cache miss - needs refresh
}
// Update TLS cache after refresh
static inline void small_policy_snapshot_tls_update(
SmallPolicySnapshotTLSCache* cache,
const struct SmallPolicyV7* ptr,
uint32_t version
) {
cache->cached_ptr = ptr;
cache->cached_version = version;
cache->initialized = 1;
}
#endif // SMALL_POLICY_SNAPSHOT_TLS_BOX_H

View File

@ -92,6 +92,8 @@ static inline int tiny_alloc_gate_validate(TinyAllocGateContext* ctx)
return 0;
}
if (ctx->class_idx >= 0 && (uint8_t)ctx->class_idx != meta_cls) {
// Phase 34B: Compile-out alloc gate class mismatch counter (default OFF)
#if HAKMEM_ALLOC_GATE_CLS_MIS_COMPILED
static _Atomic uint32_t g_alloc_gate_cls_mis = 0;
uint32_t n = atomic_fetch_add_explicit(&g_alloc_gate_cls_mis, 1, memory_order_relaxed);
if (n < 8) {
@ -105,6 +107,9 @@ static inline int tiny_alloc_gate_validate(TinyAllocGateContext* ctx)
info.slab_idx);
fflush(stderr);
}
#else
(void)0; // No-op when compiled out
#endif
// クラス不一致自体は Fail-Fast せず、ログだけ残す(将来の Guard 差し込みポイント)。
}

View File

@ -1,6 +1,7 @@
// tiny_front_v3_env_box.h - Tiny Front v3 ENV gate & snapshot (guard/UC/header)
#pragma once
#include "../hakmem_build_flags.h" // Phase 35-A: HAKMEM_BENCH_MINIMAL
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
@ -28,6 +29,12 @@ extern TinyFrontV3Snapshot g_tiny_front_v3_snapshot;
extern int g_tiny_front_v3_snapshot_ready;
// ENV gate: default ON (set HAKMEM_TINY_FRONT_V3_ENABLED=0 to disable)
// Phase 35-A: BENCH_MINIMAL mode - compile-time constant (default ON)
#if HAKMEM_BENCH_MINIMAL
static inline bool tiny_front_v3_enabled(void) {
return true; // Fixed ON in bench mode (default behavior)
}
#else
static inline bool tiny_front_v3_enabled(void) {
static int g_enable = -1;
if (__builtin_expect(g_enable == -1, 0)) {
@ -40,6 +47,7 @@ static inline bool tiny_front_v3_enabled(void) {
}
return g_enable != 0;
}
#endif
// Optional: size→class LUT gate (default ON, set HAKMEM_TINY_FRONT_V3_LUT_ENABLED=0 to disable)
static inline bool tiny_front_v3_lut_enabled(void) {

View File

@ -18,6 +18,12 @@
// Forward declare the learner enabled check (to avoid header conflicts)
extern bool small_learner_v2_enabled(void);
// Phase 35-A: BENCH_MINIMAL mode - compile-time constant (default OFF)
#if HAKMEM_BENCH_MINIMAL
static inline int tiny_metadata_cache_enabled(void) {
return 0; // Fixed OFF in bench mode (default behavior)
}
#else
static inline int tiny_metadata_cache_enabled(void) {
static int g = -1;
static int g_probe_left = 64; // tolerate early getenv() instability (bench_profile putenv)
@ -54,5 +60,6 @@ static inline int tiny_metadata_cache_enabled(void) {
g = 0;
return 0;
}
#endif
#endif // HAK_TINY_METADATA_CACHE_ENV_BOX_H