Files
hakmem/core/smallobject_policy_v7.c
Moe Charm (CI) b7085c47e1 Phase 35-39: FAST build optimization complete (+7.13% cumulative)
Phase 35-A: BENCH_MINIMAL gate function elimination (GO +4.39%)
- tiny_front_v3_enabled() → constant true
- tiny_metadata_cache_enabled() → constant 0
- learner_v7_enabled() → constant false
- small_learner_v2_enabled() → constant false

Phase 36: Policy snapshot init-once (GO +0.71%)
- small_policy_v7_snapshot() version check skip in BENCH_MINIMAL
- TLS cache for policy snapshot

Phase 37: Standard TLS cache (NO-GO -0.07%)
- TLS cache for Standard build attempted
- Runtime gate overhead negates benefit

Phase 38: FAST/OBSERVE/Standard workflow established
- make perf_fast, make perf_observe targets
- Scorecard and documentation updates

Phase 39: Hot path gate constantization (GO +1.98%)
- front_gate_unified_enabled() → constant 1
- alloc_dualhot_enabled() → constant 0
- g_bench_fast_front, g_v3_enabled blocks → compile-out
- free_dispatch_stats_enabled() → constant false

Results:
- FAST v3: 56.04M ops/s (47.4% of mimalloc)
- Standard: 53.50M ops/s (45.3% of mimalloc)
- M1 target (50%): 5.5% remaining

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-16 15:01:56 +09:00

347 lines
12 KiB
C

// smallobject_policy_v7.c - Policy Box implementation (Phase v7-7: Learner integration)
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "hakmem_build_flags.h" // Phase 36: HAKMEM_BENCH_MINIMAL
#include "box/smallobject_policy_v7_box.h"
#include "box/smallobject_learner_v7_box.h" // For Learner API
#include "box/small_policy_snapshot_tls_box.h" // Phase 37: TLS cache
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#endif
// ============================================================================
// Learner Stats (Global, v7-7)
// ============================================================================
static SmallLearnerStatsV7 g_small_learner_stats_v7;
static int g_learner_v7_enabled = -1; // -1: uninit, 0: disabled, 1: enabled
// Phase 36: BENCH_MINIMAL mode - learner is disabled (bench profiles don't use learner)
#if HAKMEM_BENCH_MINIMAL
static inline int learner_v7_enabled(void) {
return 0; // Fixed OFF in bench mode
}
#else
static inline int learner_v7_enabled(void) {
if (unlikely(g_learner_v7_enabled < 0)) {
// Phase v10: Learner default ON (when v7 is enabled)
// Can be disabled via ENV: HAKMEM_SMALL_LEARNER_V7_ENABLED=0
const char* e = getenv("HAKMEM_SMALL_LEARNER_V7_ENABLED");
if (e && *e && *e == '0') {
g_learner_v7_enabled = 0; // explicitly disabled
} else {
// Default: ON (when v7 is also enabled)
const char* v7_e = getenv("HAKMEM_SMALL_HEAP_V7_ENABLED");
g_learner_v7_enabled = (v7_e && *v7_e && *v7_e != '0') ? 1 : 0;
}
}
return g_learner_v7_enabled;
}
#endif
// ============================================================================
// TLS Policy Snapshot (v7-7: version-based invalidation)
// ============================================================================
static uint32_t g_policy_v7_version = 0; // Global version, incremented by Learner
static __thread SmallPolicyV7 g_small_policy_v7;
static __thread uint32_t g_small_policy_v7_version = 0; // TLS cached version
void small_policy_v7_bump_version(void) {
__sync_fetch_and_add(&g_policy_v7_version, 1);
}
const SmallPolicyV7* small_policy_v7_snapshot(void) {
#if HAKMEM_BENCH_MINIMAL
// Phase 36: BENCH_MINIMAL mode - skip version check, use init-once TLS cache
// Assumes: Learner disabled, policy doesn't change during benchmark
static __thread int s_initialized = 0;
if (unlikely(!s_initialized)) {
small_policy_v7_init_from_env(&g_small_policy_v7);
s_initialized = 1;
}
return &g_small_policy_v7;
#else
// Phase 37: TLS cache fast path (default ON)
if (policy_snapshot_tls_enabled()) {
SmallPolicySnapshotTLSCache* cache = small_policy_snapshot_tls_get();
uint32_t gver = g_policy_v7_version;
// Fast path: cache valid → return immediately (no global read beyond version)
if (small_policy_snapshot_tls_check(cache, gver)) {
return cache->cached_ptr;
}
// Slow path: refresh cache
small_policy_v7_init_from_env(&g_small_policy_v7);
// v7-7: Apply Learner-driven route updates
if (learner_v7_enabled() && g_small_learner_stats_v7.total_retires > 0) {
small_policy_v7_update_from_learner(&g_small_learner_stats_v7, &g_small_policy_v7);
}
// Initialize global version to 1 if uninitialized (0)
if (gver == 0) {
__sync_val_compare_and_swap(&g_policy_v7_version, 0, 1);
gver = 1;
}
// Update TLS cache
small_policy_snapshot_tls_update(cache, &g_small_policy_v7, gver);
return &g_small_policy_v7;
}
// Fallback: original version-check path (HAKMEM_POLICY_SNAPSHOT_TLS=0)
if (unlikely(g_small_policy_v7_version != g_policy_v7_version || g_policy_v7_version == 0)) {
small_policy_v7_init_from_env(&g_small_policy_v7);
// v7-7: Apply Learner-driven route updates
if (learner_v7_enabled() && g_small_learner_stats_v7.total_retires > 0) {
small_policy_v7_update_from_learner(&g_small_learner_stats_v7, &g_small_policy_v7);
}
// Initialize global version to 1 if uninitialized (0)
// This prevents infinite re-init loop where condition is always true
if (g_policy_v7_version == 0) {
__sync_val_compare_and_swap(&g_policy_v7_version, 0, 1);
}
g_small_policy_v7_version = g_policy_v7_version;
}
return &g_small_policy_v7;
#endif
}
// ============================================================================
// ENV Helpers
// ============================================================================
static inline bool env_enabled(const char* name) {
const char* e = getenv(name);
return (e && *e && *e != '0');
}
static inline uint32_t env_class_mask(const char* name, uint32_t default_mask) {
const char* e = getenv(name);
if (e && *e) {
return (uint32_t)strtoul(e, NULL, 0);
}
return default_mask;
}
// ============================================================================
// MID v3.5 ENV Helpers (Phase v11a-3)
// ============================================================================
static inline bool mid_v35_enabled(void) {
const char* e = getenv("HAKMEM_MID_V35_ENABLED");
return (e && *e && *e != '0');
}
static inline uint32_t mid_v35_class_mask(void) {
const char* e = getenv("HAKMEM_MID_V35_CLASSES");
if (e && *e) {
return (uint32_t)strtoul(e, NULL, 0);
}
return 0x60; // Default: C5(0x20) + C6(0x40)
}
// ============================================================================
// Policy Initialization from ENV
// ============================================================================
void small_policy_v7_init_from_env(SmallPolicyV7* policy) {
if (!policy) return;
// Default: all classes go to LEGACY
for (int i = 0; i < 8; i++) {
policy->route_kind[i] = SMALL_ROUTE_LEGACY;
}
// Priority 4: MID_v3 (257-768B, C5-C6 range)
// ENV: HAKMEM_MID_V3_ENABLED, HAKMEM_MID_V3_CLASSES
if (env_enabled("HAKMEM_MID_V3_ENABLED")) {
uint32_t mid_mask = env_class_mask("HAKMEM_MID_V3_CLASSES", 0x60); // C5-C6 default
for (int i = 0; i < 8; i++) {
if (mid_mask & (1u << i)) {
policy->route_kind[i] = SMALL_ROUTE_MID_V3;
}
}
}
// Priority 3: MID_v3.5 (Phase v11a-3: higher priority than MID_v3)
// ENV: HAKMEM_MID_V35_ENABLED, HAKMEM_MID_V35_CLASSES
if (mid_v35_enabled()) {
uint32_t v35_mask = mid_v35_class_mask();
for (int i = 0; i < 8; i++) {
if (v35_mask & (1u << i)) {
// Only override if not ULTRA
if (policy->route_kind[i] != SMALL_ROUTE_ULTRA) {
policy->route_kind[i] = SMALL_ROUTE_MID_V35;
}
}
}
}
// Priority 2: SmallObject v7 (research box, C6-only for now)
// ENV: HAKMEM_SMALL_HEAP_V7_ENABLED, HAKMEM_SMALL_HEAP_V7_CLASSES
if (env_enabled("HAKMEM_SMALL_HEAP_V7_ENABLED")) {
uint32_t v7_mask = env_class_mask("HAKMEM_SMALL_HEAP_V7_CLASSES", 0x40); // C6 default
for (int i = 0; i < 8; i++) {
if (v7_mask & (1u << i)) {
policy->route_kind[i] = SMALL_ROUTE_V7;
}
}
}
// Priority 1: ULTRA (highest priority, C4-C7)
// Phase v11a-5: All ULTRA ENVs consolidated here (removed from hot path)
// C7 ULTRA (default ON via HAKMEM_TINY_C7_ULTRA_ENABLED)
if (env_enabled("HAKMEM_TINY_C7_ULTRA_ENABLED")) {
policy->route_kind[7] = SMALL_ROUTE_ULTRA;
}
// C6 ULTRA (via HAKMEM_TINY_C6_ULTRA_FREE_ENABLED - TLS freelist pop)
if (env_enabled("HAKMEM_TINY_C6_ULTRA_FREE_ENABLED")) {
policy->route_kind[6] = SMALL_ROUTE_ULTRA;
}
// C5 ULTRA (via HAKMEM_TINY_C5_ULTRA_FREE_ENABLED - TLS freelist pop)
if (env_enabled("HAKMEM_TINY_C5_ULTRA_FREE_ENABLED")) {
policy->route_kind[5] = SMALL_ROUTE_ULTRA;
}
// C4 ULTRA (via HAKMEM_TINY_C4_ULTRA_FREE_ENABLED - TLS freelist pop)
if (env_enabled("HAKMEM_TINY_C4_ULTRA_FREE_ENABLED")) {
policy->route_kind[4] = SMALL_ROUTE_ULTRA;
}
// Debug output (if needed)
static int g_debug_once = 0;
if (!g_debug_once) {
g_debug_once = 1;
fprintf(stderr, "[POLICY_V7_INIT] Route assignments:\n");
for (int i = 0; i < 8; i++) {
fprintf(stderr, " C%d: %s\n", i, small_route_kind_name(policy->route_kind[i]));
}
}
}
// ============================================================================
// Utility
// ============================================================================
const char* small_route_kind_name(SmallRouteKind kind) {
switch (kind) {
case SMALL_ROUTE_ULTRA: return "ULTRA";
case SMALL_ROUTE_V7: return "V7";
case SMALL_ROUTE_MID_V3: return "MID_V3";
case SMALL_ROUTE_MID_V35: return "MID_V35";
case SMALL_ROUTE_LEGACY: return "LEGACY";
default: return "UNKNOWN";
}
}
// ============================================================================
// Learner Implementation (Phase v7-7)
// ============================================================================
// Total refills (for evaluation interval)
static uint64_t g_small_learner_total_refills = 0;
void small_learner_v7_record_refill(uint32_t class_idx, uint64_t capacity) {
if (!learner_v7_enabled()) return;
if (class_idx >= 8) return;
// Record stats: refill indicates page was needed (current page exhausted)
SmallLearnerClassStatsV7* cls = &g_small_learner_stats_v7.per_class[class_idx];
cls->v7_allocs += capacity; // Use capacity as proxy for traffic volume
cls->sample_count++;
g_small_learner_total_refills++;
// Periodic evaluation (on refills)
if (g_small_learner_total_refills % SMALL_LEARNER_EVAL_INTERVAL == 0) {
small_learner_v7_evaluate();
}
}
void small_learner_v7_record_retire(uint32_t class_idx, uint64_t capacity) {
if (!learner_v7_enabled()) return;
if (class_idx >= 8) return;
// Record stats (atomic would be better for multi-thread, but keep simple for now)
// Note: v7-5a removed per-page alloc_count from hot path, so we use capacity instead
// capacity represents "slots available on page" which approximates traffic volume
SmallLearnerClassStatsV7* cls = &g_small_learner_stats_v7.per_class[class_idx];
cls->v7_retires++;
g_small_learner_stats_v7.total_retires++;
(void)capacity; // Not used for now (kept for API compatibility)
}
void small_policy_v7_update_from_learner(
const SmallLearnerStatsV7* stats,
SmallPolicyV7* policy_out
) {
if (!stats || !policy_out) return;
// Calculate total allocs across all classes
uint64_t total_allocs = 0;
for (int i = 0; i < 8; i++) {
total_allocs += stats->per_class[i].v7_allocs;
}
if (total_allocs == 0) return; // No data yet
// C5 decision: if C5 ratio > threshold, route to v7, else MID_v3
uint64_t c5_allocs = stats->per_class[5].v7_allocs;
uint64_t c5_ratio_pct = (c5_allocs * 100) / total_allocs;
SmallRouteKind old_c5_route = policy_out->route_kind[5];
SmallRouteKind new_c5_route;
if (c5_ratio_pct >= SMALL_LEARNER_C5_THRESHOLD_PCT) {
// C5-heavy workload → keep C5 on v7
new_c5_route = SMALL_ROUTE_V7;
} else {
// Mixed workload → move C5 to MID_v3
new_c5_route = SMALL_ROUTE_MID_V3;
}
// Only log and update if route changed
if (old_c5_route != new_c5_route) {
// Log only the first switch (to avoid spam)
static int g_learner_v7_switch_logged = 0;
if (!g_learner_v7_switch_logged) {
g_learner_v7_switch_logged = 1;
fprintf(stderr, "[LEARNER_V7] C5 route switch: %s → %s (C5 ratio=%lu%%, threshold=%d%%)\n",
small_route_kind_name(old_c5_route),
small_route_kind_name(new_c5_route),
(unsigned long)c5_ratio_pct,
SMALL_LEARNER_C5_THRESHOLD_PCT);
}
policy_out->route_kind[5] = new_c5_route;
}
}
const SmallLearnerStatsV7* small_learner_v7_stats_snapshot(void) {
return &g_small_learner_stats_v7;
}
void small_learner_v7_evaluate(void) {
if (!learner_v7_enabled()) return;
// Increment global version to invalidate all TLS caches
// Next call to small_policy_v7_snapshot() will re-apply Learner updates
__sync_fetch_and_add(&g_policy_v7_version, 1);
g_small_learner_stats_v7.eval_count++;
}