Phase 68: PGO training set diversification (seed/WS expansion)

Changes:
- scripts/box/pgo_fast_profile_config.sh: Expanded WS patterns (3→5) and seeds (1→3)
  for reduced overfitting and better production workload representativeness
- PERFORMANCE_TARGETS_SCORECARD.md: Phase 68 baseline promoted (61.614M = 50.93%)
- CURRENT_TASK.md: Phase 68 marked complete, Phase 67a (layout tax forensics) set Active

Results:
- 10-run verification: +1.19% vs Phase 66 baseline (GO, >+1.0% threshold)
- M1 milestone: 50.93% of mimalloc (target 50%, exceeded by +0.93pp)
- Stability: 10-run mean/median with <2.1% CV

🤖 Generated with Claude Code

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-12-17 21:08:17 +09:00
parent 10fb0497e2
commit 84f5034e45
44 changed files with 1520 additions and 583 deletions

View File

@ -25,7 +25,8 @@ core/box/carve_push_box.o: core/box/carve_push_box.c \
core/box/../hakmem_tiny_integrity.h core/box/../hakmem_tiny.h \
core/box/../tiny_region_id.h core/box/../tiny_box_geometry.h \
core/box/../ptr_track.h core/box/../tiny_debug_api.h \
core/box/../box/tiny_header_hotfull_env_box.h core/box/carve_push_box.h \
core/box/../box/tiny_header_hotfull_env_box.h \
core/box/../box/../hakmem_build_flags.h core/box/carve_push_box.h \
core/box/capacity_box.h core/box/tls_sll_box.h \
core/box/../hakmem_internal.h core/box/../hakmem.h \
core/box/../hakmem_config.h core/box/../hakmem_features.h \
@ -87,6 +88,7 @@ core/box/../tiny_box_geometry.h:
core/box/../ptr_track.h:
core/box/../tiny_debug_api.h:
core/box/../box/tiny_header_hotfull_env_box.h:
core/box/../box/../hakmem_build_flags.h:
core/box/carve_push_box.h:
core/box/capacity_box.h:
core/box/tls_sll_box.h:

View File

@ -19,6 +19,7 @@
#include <stdatomic.h>
#include <stdlib.h>
#include "../hakmem_build_flags.h"
// ENV control: cached flag for fastlane_direct_enabled()
// -1: uninitialized, 0: disabled, 1: enabled
@ -30,6 +31,9 @@ extern _Atomic int g_fastlane_direct_enabled;
// Returns: 1 if enabled, 0 if disabled
// Hot path: Single atomic load (after first call)
static inline int fastlane_direct_enabled(void) {
#if HAKMEM_FAST_PROFILE_FIXED
return 1;
#endif
int val = atomic_load_explicit(&g_fastlane_direct_enabled, memory_order_relaxed);
if (__builtin_expect(val == -1, 0)) {
// Cold path: Initialize from ENV

View File

@ -30,6 +30,7 @@
#include <stdatomic.h>
#include <stdlib.h>
#include "../hakmem_build_flags.h"
// Forward declaration for cross-box includes
static inline int free_tiny_direct_enabled(void);
@ -41,6 +42,9 @@ static inline int free_tiny_direct_enabled(void);
// Lazy init: Check ENV variable on first call, cache result
// Thread-safe: Read-only after init (atomic store, relaxed load)
static inline int free_tiny_direct_enabled(void) {
#if HAKMEM_FAST_PROFILE_FIXED
return 1;
#endif
static _Atomic int cached = -1; // -1 = uninitialized
int val = atomic_load_explicit(&cached, memory_order_relaxed);

View File

@ -14,6 +14,9 @@
#include "../hakmem_build_flags.h"
static inline int hak_free_tiny_fast_hotcold_enabled(void) {
#if HAKMEM_FAST_PROFILE_FIXED
return 1;
#endif
static int g = -1;
static int g_probe_left = 64; // tolerate early getenv() instability (bench_profile putenv)

View File

@ -4,6 +4,7 @@
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include "../hakmem_build_flags.h"
// Phase 9: FREE-TINY-FAST MONO DUALHOT ENV gate
//
@ -16,6 +17,9 @@
// - A/B: Same binary, flip ENV for immediate rollback
static inline int free_tiny_fast_mono_dualhot_enabled(void) {
#if HAKMEM_FAST_PROFILE_FIXED
return 1;
#endif
static int g_enabled = -1; // -1: unknown, 0: off, 1: on
static int g_probe_left = 64; // Probe window (tolerate early putenv)

View File

@ -22,6 +22,9 @@
// ============================================================================
static inline int free_tiny_fast_mono_legacy_direct_enabled(void) {
#if HAKMEM_FAST_PROFILE_FIXED
return 1;
#endif
static int g = -1;
static int g_probe_left = 64; // tolerate early getenv() instability (bench_profile putenv)

View File

@ -44,6 +44,9 @@ extern __thread struct free_wrapper_env_snapshot g_free_wrapper_env;
// ENV gate: Enable/disable snapshot optimization (default: OFF, research box)
static inline int free_wrapper_env_snapshot_enabled(void)
{
#if HAKMEM_FAST_PROFILE_FIXED
return 1;
#endif
static __thread int s_enabled = -1;
if (__builtin_expect(s_enabled == -1, 0)) {
const char* env = getenv("HAKMEM_FREE_WRAPPER_ENV_SNAPSHOT");

View File

@ -3,6 +3,7 @@
// ============================================================================
#include "front_fastlane_alloc_legacy_direct_env_box.h"
#include "../hakmem_build_flags.h"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
@ -19,6 +20,11 @@ _Atomic int g_front_fastlane_alloc_legacy_direct_enabled = -1;
// ============================================================================
int front_fastlane_alloc_legacy_direct_env_init(void) {
#if HAKMEM_FAST_PROFILE_FIXED
// Fixed OFF in FAST profile fixed builds.
atomic_store_explicit(&g_front_fastlane_alloc_legacy_direct_enabled, 0, memory_order_relaxed);
return 0;
#else
const char* env = getenv("HAKMEM_FRONT_FASTLANE_ALLOC_LEGACY_DIRECT");
int enabled = 0; // default: OFF (opt-in)
@ -37,6 +43,7 @@ int front_fastlane_alloc_legacy_direct_env_init(void) {
}
return enabled;
#endif
}
// ============================================================================
@ -45,11 +52,15 @@ int front_fastlane_alloc_legacy_direct_env_init(void) {
// LTO fallback: Non-inline version for cases where LTO can't inline
int front_fastlane_alloc_legacy_direct_enabled(void) {
#if HAKMEM_FAST_PROFILE_FIXED
return 0;
#else
int val = atomic_load_explicit(&g_front_fastlane_alloc_legacy_direct_enabled, memory_order_relaxed);
if (__builtin_expect(val == -1, 0)) {
val = front_fastlane_alloc_legacy_direct_env_init();
}
return val;
#endif
}
// ============================================================================
@ -57,7 +68,12 @@ int front_fastlane_alloc_legacy_direct_enabled(void) {
// ============================================================================
void front_fastlane_alloc_legacy_direct_env_refresh_from_env(void) {
#if HAKMEM_FAST_PROFILE_FIXED
// Keep fixed OFF.
atomic_store_explicit(&g_front_fastlane_alloc_legacy_direct_enabled, 0, memory_order_relaxed);
#else
// Reset to uninitialized state (-1)
// Next call to front_fastlane_alloc_legacy_direct_enabled() will re-read ENV
atomic_store_explicit(&g_front_fastlane_alloc_legacy_direct_enabled, -1, memory_order_relaxed);
#endif
}

View File

@ -35,6 +35,7 @@
#define FRONT_FASTLANE_ALLOC_LEGACY_DIRECT_ENV_BOX_H
#include <stdatomic.h>
#include "../hakmem_build_flags.h"
// ============================================================================
// Global State (L0)

View File

@ -35,6 +35,7 @@
#include <stdatomic.h>
#include <stdlib.h>
#include <stdint.h>
#include "../hakmem_build_flags.h"
// Forward declaration for cross-box includes
static inline int front_fastlane_enabled(void);
@ -48,6 +49,9 @@ static inline int front_fastlane_free_dedup_enabled(void);
// Lazy init: Check ENV variable on first call, cache result
// Thread-safe: Read-only after init (atomic store, relaxed load)
static inline int front_fastlane_enabled(void) {
#if HAKMEM_FAST_PROFILE_FIXED
return 1;
#endif
static _Atomic int cached = -1; // -1 = uninitialized
int val = atomic_load_explicit(&cached, memory_order_relaxed);
@ -71,6 +75,9 @@ static inline int front_fastlane_enabled(void) {
// Get class mask for gradual rollout (default: 0xFF = all classes)
static inline uint8_t front_fastlane_class_mask(void) {
#if HAKMEM_FAST_PROFILE_FIXED
return (uint8_t)0xFF;
#endif
static _Atomic int cached = -1; // -1 = uninitialized
int val = atomic_load_explicit(&cached, memory_order_relaxed);
@ -100,6 +107,9 @@ static inline uint8_t front_fastlane_class_mask(void) {
// When enabled, front_fastlane_try_free() directly calls free_tiny_fast()
// instead of doing its own header validation.
static inline int front_fastlane_free_dedup_enabled(void) {
#if HAKMEM_FAST_PROFILE_FIXED
return 1;
#endif
static _Atomic int cached = -1; // -1 = uninitialized
int val = atomic_load_explicit(&cached, memory_order_relaxed);

View File

@ -11,6 +11,7 @@
#include "tiny_c7_hotbox.h" // tiny_c7_alloc_fast wrapper
#include "mid_hotbox_v3_box.h" // Phase MID-V3: Mid/Pool HotBox v3 types
#include "mid_hotbox_v3_env_box.h" // Phase MID-V3: ENV gate for v3
#include "../hakmem_build_flags.h" // Phase 64: For backend pruning
#ifdef HAKMEM_POOL_TLS_PHASE1
#include "../pool_tls.h"
@ -79,6 +80,7 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
// Design: TLS lane cache with page-based allocation, RegionIdBox integration
// NOTE: Must come BEFORE Tiny to intercept specific size classes
// PERF: C6 shows +11% improvement, Mixed (257-768B) shows +19.8% improvement
#if !HAKMEM_FAST_PROFILE_PRUNE_BACKENDS
if (__builtin_expect(mid_v3_enabled() && size >= 257 && size <= 768, 0)) {
static _Atomic int entry_log_count = 0;
if (mid_v3_debug_enabled() && atomic_fetch_add(&entry_log_count, 1) < 3) {
@ -115,6 +117,7 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
}
}
}
#endif
// Phase 16: Dynamic Tiny max size (ENV: HAKMEM_TINY_MAX_CLASS)
// Default: 1023B (C0-C7), reduced to 255B (C0-C5) when Small-Mid enabled

View File

@ -239,6 +239,7 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
g_free_dispatch_ssot = (env && *env == '1') ? 1 : 0;
}
#if !HAKMEM_FAST_PROFILE_PRUNE_BACKENDS
if (g_free_dispatch_ssot && __builtin_expect(mid_v3_enabled(), 0)) {
// SSOT=1: Single lookup, then dispatch
extern RegionLookupV6 region_id_lookup_cached_v6(void* ptr);
@ -279,6 +280,7 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
goto done;
}
}
#endif
{
extern int hak_pool_mid_lookup(void* ptr, size_t* out_size);

View File

@ -60,7 +60,8 @@ extern int g_hakmem_env_snapshot_ctor_mode;
// ENV gate: default OFF (research box, set =1 to enable)
// E3-4: Dual-mode - constructor init (fast) or legacy lazy init (fallback)
// Phase 18 v2: BENCH_MINIMAL conditional (constant return when HAKMEM_BENCH_MINIMAL=1)
#if HAKMEM_BENCH_MINIMAL
// Phase 63: FAST_PROFILE_FIXED conditional (constant return for FAST profile builds)
#if HAKMEM_BENCH_MINIMAL || HAKMEM_FAST_PROFILE_FIXED
// In bench mode, snapshot is always enabled (one-time cost, compile-away benefit)
static inline bool hakmem_env_snapshot_enabled(void) {
return 1;

View File

@ -11,9 +11,14 @@
#pragma once
#include "../hakmem_config.h"
#include "../hakmem_build_flags.h"
#include <stdlib.h>
static inline int hak_learner_env_should_run(void) {
#if HAKMEM_BENCH_MINIMAL || HAKMEM_FAST_PROFILE_FIXED || HAKMEM_FAST_PROFILE_PRUNE_BACKENDS
// Phase 63/64: Disable learning layer in FAST profile-fixed/pruned builds
return 0;
#endif
static int g_inited = 0;
static int g_effective = 0;
if (__builtin_expect(!g_inited, 0)) {
@ -30,4 +35,3 @@ static inline int hak_learner_env_should_run(void) {
}
return g_effective;
}

View File

@ -30,6 +30,7 @@
#include <stdatomic.h>
#include <stdlib.h>
#include "../hakmem_build_flags.h"
// Forward declaration for cross-box includes
static inline int malloc_tiny_direct_enabled(void);
@ -41,6 +42,9 @@ static inline int malloc_tiny_direct_enabled(void);
// Lazy init: Check ENV variable on first call, cache result
// Thread-safe: Read-only after init (atomic store, relaxed load)
static inline int malloc_tiny_direct_enabled(void) {
#if HAKMEM_FAST_PROFILE_FIXED
return 0;
#endif
static _Atomic int cached = -1; // -1 = uninitialized
int val = atomic_load_explicit(&cached, memory_order_relaxed);

View File

@ -44,6 +44,9 @@ extern __thread struct malloc_wrapper_env_snapshot g_malloc_wrapper_env;
// ENV gate: Enable/disable snapshot optimization (default: OFF, research box)
static inline int malloc_wrapper_env_snapshot_enabled(void)
{
#if HAKMEM_FAST_PROFILE_FIXED
return 1;
#endif
static __thread int s_enabled = -1;
if (__builtin_expect(s_enabled == -1, 0)) {
const char* env = getenv("HAKMEM_MALLOC_WRAPPER_ENV_SNAPSHOT");

View File

@ -6,12 +6,18 @@
#include <stdlib.h>
#include "../hakmem_tiny_config.h"
#include "../hakmem_build_flags.h"
// ============================================================================
// HAKMEM_MID_V3_ENABLED: Master switch for MID v3
// ============================================================================
static inline int mid_v3_enabled(void) {
#if HAKMEM_FAST_PROFILE_PRUNE_BACKENDS
// Phase 64: Backend pruning - disable MID_V3 in Mixed workload
// Compile-time constant for DCE (unreachable code elimination)
return 0;
#else
static int g_enable = -1;
if (__builtin_expect(g_enable == -1, 0)) {
const char* e = getenv("HAKMEM_MID_V3_ENABLED");
@ -23,6 +29,7 @@ static inline int mid_v3_enabled(void) {
}
}
return g_enable;
#endif
}
// ============================================================================

View File

@ -8,6 +8,7 @@
#define POOL_CONFIG_BOX_H
#include "tiny_heap_env_box.h" // TinyHeap profile (C7_SAFE modes)
#include "../hakmem_build_flags.h"
#include <stdlib.h>
#include <string.h>
@ -17,12 +18,18 @@
// Pool v2 is experimental. Default OFF (use legacy v1 path).
static inline int hak_pool_v2_enabled(void) {
#if HAKMEM_FAST_PROFILE_PRUNE_BACKENDS
// Phase 64: Backend pruning - disable POOL_V2 in Mixed workload
// Compile-time constant for DCE (unreachable code elimination)
return 0;
#else
static int g = -1;
if (__builtin_expect(g == -1, 0)) {
const char* e = getenv("HAKMEM_POOL_V2_ENABLED");
g = (e && *e && *e != '0') ? 1 : 0;
}
return g;
#endif
}
// Fine-grained switches (only used when v2 is enabled).

View File

@ -35,6 +35,7 @@ core/box/superslab_expansion_box.o: core/box/superslab_expansion_box.c \
core/box/../tiny_region_id.h core/box/../tiny_box_geometry.h \
core/box/../ptr_track.h core/box/../tiny_debug_api.h \
core/box/../box/tiny_header_hotfull_env_box.h \
core/box/../box/../hakmem_build_flags.h \
core/box/../hakmem_tiny_integrity.h core/box/../box/tiny_next_ptr_box.h \
core/hakmem_tiny_config.h core/tiny_nextptr.h core/hakmem_build_flags.h \
core/tiny_region_id.h core/superslab/superslab_inline.h \
@ -93,6 +94,7 @@ core/box/../tiny_box_geometry.h:
core/box/../ptr_track.h:
core/box/../tiny_debug_api.h:
core/box/../box/tiny_header_hotfull_env_box.h:
core/box/../box/../hakmem_build_flags.h:
core/box/../hakmem_tiny_integrity.h:
core/box/../box/tiny_next_ptr_box.h:
core/hakmem_tiny_config.h:

View File

@ -15,6 +15,7 @@
#include <stdatomic.h>
#include <stdlib.h>
#include <stdio.h>
#include "../hakmem_build_flags.h"
// Global state for free static route ENV gate (defined in .c file)
// -1 = uninitialized, 0 = disabled, 1 = enabled
@ -28,6 +29,9 @@ void tiny_free_static_route_refresh_from_env(void);
// Returns: 1 if enabled, 0 if disabled (default)
// ENV: HAKMEM_FREE_STATIC_ROUTE=0/1
static inline int tiny_free_static_route_enabled(void) {
#if HAKMEM_FAST_PROFILE_FIXED
return 1;
#endif
int current = atomic_load_explicit(&g_free_static_route_enabled, memory_order_relaxed);
if (__builtin_expect(current >= 0, 1)) {
return current;

View File

@ -30,7 +30,8 @@ extern int g_tiny_front_v3_snapshot_ready;
// ENV gate: default ON (set HAKMEM_TINY_FRONT_V3_ENABLED=0 to disable)
// Phase 35-A: BENCH_MINIMAL mode - compile-time constant (default ON)
#if HAKMEM_BENCH_MINIMAL
// Phase 63: FAST_PROFILE_FIXED - compile-time constant (FAST profile defaults)
#if HAKMEM_BENCH_MINIMAL || HAKMEM_FAST_PROFILE_FIXED
static inline bool tiny_front_v3_enabled(void) {
return true; // Fixed ON in bench mode (default behavior)
}
@ -51,6 +52,9 @@ static inline bool tiny_front_v3_enabled(void) {
// Optional: size→class LUT gate (default ON, set HAKMEM_TINY_FRONT_V3_LUT_ENABLED=0 to disable)
static inline bool tiny_front_v3_lut_enabled(void) {
#if HAKMEM_FAST_PROFILE_FIXED
return true;
#endif
static int g = -1;
if (__builtin_expect(g == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_FRONT_V3_LUT_ENABLED");
@ -65,6 +69,9 @@ static inline bool tiny_front_v3_lut_enabled(void) {
// Optional: route fast path (Tiny LUT→1 switch). Default OFF for easy rollback.
static inline bool tiny_front_v3_route_fast_enabled(void) {
#if HAKMEM_FAST_PROFILE_FIXED
return false;
#endif
static int g = -1;
if (__builtin_expect(g == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_FRONT_V3_ROUTE_FAST_ENABLED");
@ -75,6 +82,9 @@ static inline bool tiny_front_v3_route_fast_enabled(void) {
// C7 v3 free 専用 ptr fast classify gate (default OFF)
static inline bool tiny_ptr_fast_classify_enabled(void) {
#if HAKMEM_FAST_PROFILE_FIXED
return true;
#endif
static int g = -1;
if (__builtin_expect(g == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_PTR_FAST_CLASSIFY_ENABLED");
@ -89,6 +99,9 @@ static inline bool tiny_ptr_fast_classify_enabled(void) {
// C7/C6 v4 free 用 fast classify gate (default OFF)
static inline bool tiny_ptr_fast_classify_v4_enabled(void) {
#if HAKMEM_FAST_PROFILE_FIXED
return false;
#endif
static int g = -1;
if (__builtin_expect(g == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_PTR_FAST_CLASSIFY_V4_ENABLED");
@ -99,6 +112,9 @@ static inline bool tiny_ptr_fast_classify_v4_enabled(void) {
// Optional stats gate
static inline bool tiny_front_v3_stats_enabled(void) {
#if HAKMEM_FAST_PROFILE_FIXED
return false;
#endif
static int g = -1;
if (__builtin_expect(g == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_FRONT_V3_STATS");
@ -109,6 +125,9 @@ static inline bool tiny_front_v3_stats_enabled(void) {
// Header v3 experimental gate (default OFF)
static inline bool tiny_header_v3_enabled(void) {
#if HAKMEM_FAST_PROFILE_FIXED
return false;
#endif
static int g = -1;
if (__builtin_expect(g == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_HEADER_V3_ENABLED");

View File

@ -20,6 +20,7 @@
#include <stdatomic.h>
#include <stdlib.h>
#include "../hakmem_build_flags.h"
// ENV control: cached flag for tiny_header_hotfull_enabled()
// -1: uninitialized, 0: disabled (opt-out), 1: enabled (default)
@ -31,6 +32,9 @@ extern _Atomic int g_tiny_header_hotfull_enabled;
// Returns: 1 if enabled (default), 0 if disabled (opt-out with HAKMEM_TINY_HEADER_HOTFULL=0)
// Hot path: Single atomic load (after first call)
static inline int tiny_header_hotfull_enabled(void) {
#if HAKMEM_FAST_PROFILE_FIXED
return 1;
#endif
int val = atomic_load_explicit(&g_tiny_header_hotfull_enabled, memory_order_relaxed);
if (__builtin_expect(val == -1, 0)) {
// Cold path: Initialize from ENV

View File

@ -19,7 +19,8 @@
extern bool small_learner_v2_enabled(void);
// Phase 35-A: BENCH_MINIMAL mode - compile-time constant (default OFF)
#if HAKMEM_BENCH_MINIMAL
// Phase 63: FAST_PROFILE_FIXED - compile-time constant (FAST profile defaults)
#if HAKMEM_BENCH_MINIMAL || HAKMEM_FAST_PROFILE_FIXED
static inline int tiny_metadata_cache_enabled(void) {
return 0; // Fixed OFF in bench mode (default behavior)
}