Phase 35-39: FAST build optimization complete (+7.13% cumulative)
Phase 35-A: BENCH_MINIMAL gate function elimination (GO +4.39%) - tiny_front_v3_enabled() → constant true - tiny_metadata_cache_enabled() → constant 0 - learner_v7_enabled() → constant false - small_learner_v2_enabled() → constant false Phase 36: Policy snapshot init-once (GO +0.71%) - small_policy_v7_snapshot() version check skip in BENCH_MINIMAL - TLS cache for policy snapshot Phase 37: Standard TLS cache (NO-GO -0.07%) - TLS cache for Standard build attempted - Runtime gate overhead negates benefit Phase 38: FAST/OBSERVE/Standard workflow established - make perf_fast, make perf_observe targets - Scorecard and documentation updates Phase 39: Hot path gate constantization (GO +1.98%) - front_gate_unified_enabled() → constant 1 - alloc_dualhot_enabled() → constant 0 - g_bench_fast_front, g_v3_enabled blocks → compile-out - free_dispatch_stats_enabled() → constant false Results: - FAST v3: 56.04M ops/s (47.4% of mimalloc) - Standard: 53.50M ops/s (45.3% of mimalloc) - M1 target (50%): 5.5% remaining 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@ -1,6 +1,7 @@
|
||||
#ifndef HAKMEM_FREE_DISPATCH_STATS_BOX_H
|
||||
#define HAKMEM_FREE_DISPATCH_STATS_BOX_H
|
||||
|
||||
#include "../hakmem_build_flags.h" // Phase 39: HAKMEM_BENCH_MINIMAL (GO +1.98%)
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
@ -26,13 +27,18 @@ typedef struct FreeDispatchStats {
|
||||
} FreeDispatchStats;
|
||||
|
||||
// ENV gate
|
||||
// Phase 39: BENCH_MINIMAL → 固定 false (GO +1.98%)
|
||||
static inline bool free_dispatch_stats_enabled(void) {
|
||||
#if HAKMEM_BENCH_MINIMAL
|
||||
return false; // FAST v3: 定数化 (stats OFF)
|
||||
#else
|
||||
static int g_enabled = -1;
|
||||
if (__builtin_expect(g_enabled == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_FREE_DISPATCH_STATS");
|
||||
g_enabled = (e && *e && *e != '0') ? 1 : 0;
|
||||
}
|
||||
return g_enabled;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Global stats instance
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
#ifndef HAK_FREE_API_INC_H
|
||||
#define HAK_FREE_API_INC_H
|
||||
|
||||
#include "../hakmem_build_flags.h" // Phase 39: HAKMEM_BENCH_MINIMAL (GO +1.98%)
|
||||
#include <sys/mman.h> // For mincore() in AllocHeader safety check
|
||||
#include "hakmem_tiny_superslab.h" // For SUPERSLAB_MAGIC, SuperSlab
|
||||
#include "../ptr_trace.h" // Debug: pointer trace immediate dump on libc fallback
|
||||
@ -112,6 +113,8 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
|
||||
#endif
|
||||
// Bench-only ultra-short path: try header-based tiny fast free first
|
||||
// Enable with: HAKMEM_BENCH_FAST_FRONT=1
|
||||
// Phase 39: BENCH_MINIMAL → compile-out (GO +1.98%)
|
||||
#if !HAKMEM_BENCH_MINIMAL
|
||||
{
|
||||
static int g_bench_fast_front = -1;
|
||||
if (__builtin_expect(g_bench_fast_front == -1, 0)) {
|
||||
@ -129,6 +132,7 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!ptr) {
|
||||
#if HAKMEM_DEBUG_TIMING
|
||||
@ -168,7 +172,8 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
|
||||
case FG_DOMAIN_TINY: {
|
||||
// Phase FREE-FRONT-V3-2: v3 snapshot routing (optional, default OFF)
|
||||
// Optimized: No tiny_route_for_class() calls, no redundant ENV checks
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
// Phase 39: BENCH_MINIMAL → compile-out (GO +1.98%)
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX && !HAKMEM_BENCH_MINIMAL
|
||||
{
|
||||
// Check if v3 snapshot routing is enabled (cached)
|
||||
static int g_v3_enabled = -1;
|
||||
|
||||
15
core/box/small_policy_snapshot_tls_box.c
Normal file
15
core/box/small_policy_snapshot_tls_box.c
Normal file
@ -0,0 +1,15 @@
|
||||
// small_policy_snapshot_tls_box.c - Phase 37: Lightweight TLS cache implementation
|
||||
|
||||
#include <stdlib.h> // for NULL
|
||||
#include "small_policy_snapshot_tls_box.h"
|
||||
|
||||
// TLS singleton for policy snapshot cache
|
||||
static __thread SmallPolicySnapshotTLSCache g_policy_snapshot_tls_cache = {
|
||||
.cached_ptr = NULL,
|
||||
.cached_version = 0,
|
||||
.initialized = 0
|
||||
};
|
||||
|
||||
SmallPolicySnapshotTLSCache* small_policy_snapshot_tls_get(void) {
|
||||
return &g_policy_snapshot_tls_cache;
|
||||
}
|
||||
81
core/box/small_policy_snapshot_tls_box.h
Normal file
81
core/box/small_policy_snapshot_tls_box.h
Normal file
@ -0,0 +1,81 @@
|
||||
// small_policy_snapshot_tls_box.h - Phase 37: Lightweight TLS cache for policy snapshot
|
||||
//
|
||||
// Purpose:
|
||||
// - Reduce fixed tax from global version read in small_policy_v7_snapshot()
|
||||
// - Fast path: return cached pointer without global memory access
|
||||
// - Slow path: refresh only when global version changes
|
||||
//
|
||||
// Box Theory:
|
||||
// - Single Responsibility: TLS caching for policy snapshot
|
||||
// - Reversible: ENV gate HAKMEM_POLICY_SNAPSHOT_TLS (default ON)
|
||||
// - Clear Boundary: Only affects small_policy_v7_snapshot() internal
|
||||
|
||||
#ifndef SMALL_POLICY_SNAPSHOT_TLS_BOX_H
|
||||
#define SMALL_POLICY_SNAPSHOT_TLS_BOX_H
|
||||
|
||||
#include "../hakmem_build_flags.h"
|
||||
#include <stdlib.h> // for getenv
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
// Forward declaration
|
||||
struct SmallPolicyV7;
|
||||
|
||||
// TLS cache state
|
||||
typedef struct SmallPolicySnapshotTLSCache {
|
||||
const struct SmallPolicyV7* cached_ptr; // Cached policy pointer
|
||||
uint32_t cached_version; // Last seen global version
|
||||
int initialized; // 0 = not init, 1 = initialized
|
||||
} SmallPolicySnapshotTLSCache;
|
||||
|
||||
// ENV gate: default OFF (Phase 37 NO-GO: TLS cache has no benefit)
|
||||
// Set HAKMEM_POLICY_SNAPSHOT_TLS=1 to enable (research only)
|
||||
#if HAKMEM_BENCH_MINIMAL
|
||||
// BENCH_MINIMAL: always use Phase 36 optimization (skip version check entirely)
|
||||
static inline int policy_snapshot_tls_enabled(void) {
|
||||
return 0; // Disabled in BENCH_MINIMAL (use simpler Phase 36 path)
|
||||
}
|
||||
#else
|
||||
static inline int policy_snapshot_tls_enabled(void) {
|
||||
static int g = -1;
|
||||
if (__builtin_expect(g == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_POLICY_SNAPSHOT_TLS");
|
||||
// Phase 37 NO-GO: default OFF (TLS cache adds overhead, no benefit)
|
||||
if (e && *e == '1') {
|
||||
g = 1; // explicitly enabled (research only)
|
||||
} else {
|
||||
g = 0; // default OFF
|
||||
}
|
||||
}
|
||||
return g;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Get TLS cache (thread-local singleton)
|
||||
SmallPolicySnapshotTLSCache* small_policy_snapshot_tls_get(void);
|
||||
|
||||
// Check if TLS cache is valid (fast path: just compare version)
|
||||
// Returns: 1 if cache is valid and can return cached_ptr, 0 if refresh needed
|
||||
static inline int small_policy_snapshot_tls_check(
|
||||
SmallPolicySnapshotTLSCache* cache,
|
||||
uint32_t global_version
|
||||
) {
|
||||
// Fast path: initialized and version matches
|
||||
if (__builtin_expect(cache->initialized && cache->cached_version == global_version, 1)) {
|
||||
return 1; // Cache hit
|
||||
}
|
||||
return 0; // Cache miss - needs refresh
|
||||
}
|
||||
|
||||
// Update TLS cache after refresh
|
||||
static inline void small_policy_snapshot_tls_update(
|
||||
SmallPolicySnapshotTLSCache* cache,
|
||||
const struct SmallPolicyV7* ptr,
|
||||
uint32_t version
|
||||
) {
|
||||
cache->cached_ptr = ptr;
|
||||
cache->cached_version = version;
|
||||
cache->initialized = 1;
|
||||
}
|
||||
|
||||
#endif // SMALL_POLICY_SNAPSHOT_TLS_BOX_H
|
||||
@ -92,6 +92,8 @@ static inline int tiny_alloc_gate_validate(TinyAllocGateContext* ctx)
|
||||
return 0;
|
||||
}
|
||||
if (ctx->class_idx >= 0 && (uint8_t)ctx->class_idx != meta_cls) {
|
||||
// Phase 34B: Compile-out alloc gate class mismatch counter (default OFF)
|
||||
#if HAKMEM_ALLOC_GATE_CLS_MIS_COMPILED
|
||||
static _Atomic uint32_t g_alloc_gate_cls_mis = 0;
|
||||
uint32_t n = atomic_fetch_add_explicit(&g_alloc_gate_cls_mis, 1, memory_order_relaxed);
|
||||
if (n < 8) {
|
||||
@ -105,6 +107,9 @@ static inline int tiny_alloc_gate_validate(TinyAllocGateContext* ctx)
|
||||
info.slab_idx);
|
||||
fflush(stderr);
|
||||
}
|
||||
#else
|
||||
(void)0; // No-op when compiled out
|
||||
#endif
|
||||
// クラス不一致自体は Fail-Fast せず、ログだけ残す(将来の Guard 差し込みポイント)。
|
||||
}
|
||||
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
// tiny_front_v3_env_box.h - Tiny Front v3 ENV gate & snapshot (guard/UC/header)
|
||||
#pragma once
|
||||
|
||||
#include "../hakmem_build_flags.h" // Phase 35-A: HAKMEM_BENCH_MINIMAL
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
@ -28,6 +29,12 @@ extern TinyFrontV3Snapshot g_tiny_front_v3_snapshot;
|
||||
extern int g_tiny_front_v3_snapshot_ready;
|
||||
|
||||
// ENV gate: default ON (set HAKMEM_TINY_FRONT_V3_ENABLED=0 to disable)
|
||||
// Phase 35-A: BENCH_MINIMAL mode - compile-time constant (default ON)
|
||||
#if HAKMEM_BENCH_MINIMAL
|
||||
static inline bool tiny_front_v3_enabled(void) {
|
||||
return true; // Fixed ON in bench mode (default behavior)
|
||||
}
|
||||
#else
|
||||
static inline bool tiny_front_v3_enabled(void) {
|
||||
static int g_enable = -1;
|
||||
if (__builtin_expect(g_enable == -1, 0)) {
|
||||
@ -40,6 +47,7 @@ static inline bool tiny_front_v3_enabled(void) {
|
||||
}
|
||||
return g_enable != 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Optional: size→class LUT gate (default ON, set HAKMEM_TINY_FRONT_V3_LUT_ENABLED=0 to disable)
|
||||
static inline bool tiny_front_v3_lut_enabled(void) {
|
||||
|
||||
@ -18,6 +18,12 @@
|
||||
// Forward declare the learner enabled check (to avoid header conflicts)
|
||||
extern bool small_learner_v2_enabled(void);
|
||||
|
||||
// Phase 35-A: BENCH_MINIMAL mode - compile-time constant (default OFF)
|
||||
#if HAKMEM_BENCH_MINIMAL
|
||||
static inline int tiny_metadata_cache_enabled(void) {
|
||||
return 0; // Fixed OFF in bench mode (default behavior)
|
||||
}
|
||||
#else
|
||||
static inline int tiny_metadata_cache_enabled(void) {
|
||||
static int g = -1;
|
||||
static int g_probe_left = 64; // tolerate early getenv() instability (bench_profile putenv)
|
||||
@ -54,5 +60,6 @@ static inline int tiny_metadata_cache_enabled(void) {
|
||||
g = 0;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // HAK_TINY_METADATA_CACHE_ENV_BOX_H
|
||||
|
||||
Reference in New Issue
Block a user