Files
hakmem/core/hakmem_evo.c
Moe Charm (CI) 52386401b3 Debug Counters Implementation - Clean History
Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-05 12:31:14 +09:00

492 lines
16 KiB
C

// hakmem_evo.c - Learning Lifecycle State Machine Implementation
//
// License: MIT
// Date: 2025-10-21
#include "hakmem_evo.h"
#include "hakmem_p2.h"
#include "hakmem_sizeclass_dist.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <stdatomic.h>
#include <math.h> // for INFINITY
// Forward declaration
static void hak_p2_copy(hak_p2_t* dst, const hak_p2_t* src);
// ============================================================================
// Global State
// ============================================================================
static hak_evo_config_t g_config;
static _Atomic int g_mode = EVO_MODE_LEARN;
static int g_initialized = 0;
// P² estimator for p99 latency
static hak_p2_t g_p2_current;
static hak_p2_t g_p2_history[10]; // Last 10 windows for convergence check
static int g_p2_history_count = 0;
// Size distribution
static hak_sizeclass_dist_t g_dist_current;
static hak_sizeclass_dist_t g_dist_baseline;
// Baseline (set when freezing)
static double g_baseline_p99 = INFINITY;
// Window tracking
static uint64_t g_window_ops_count = 0;
static uint64_t g_window_start_ns = 0;
// State transition tracking
static uint64_t g_last_switch_ns = 0;
static uint64_t g_uptime_start_ns = 0;
// Degradation tracking (for re-learning trigger)
static int g_consecutive_bad_windows = 0;
// Statistics
static struct {
uint64_t total_samples_latency;
uint64_t total_samples_size;
uint64_t windows_completed;
uint64_t freeze_count;
uint64_t relearn_count;
uint64_t canary_success;
uint64_t canary_rollback;
} g_stats;
// CANARY mode state (Step 5)
static int g_confirmed_strategy = 0; // Confirmed best strategy
static int g_candidate_strategy = 1; // Candidate strategy (for trial)
static uint64_t g_canary_start_ns = 0; // CANARY mode start time
static _Atomic uint64_t g_canary_sample_count = 0; // Number of CANARY samples
static hak_p2_t g_canary_p99; // P99 estimator for CANARY trial
// ============================================================================
// Helper Functions
// ============================================================================
// Get current time in nanoseconds
static uint64_t get_time_ns(void) {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint64_t)ts.tv_sec * 1000000000ULL + (uint64_t)ts.tv_nsec;
}
// Get uptime in seconds
static double get_uptime_sec(void) {
uint64_t now = get_time_ns();
return (double)(now - g_uptime_start_ns) / 1e9;
}
// Check if cooldown period has passed
static int cooldown_passed(void) {
uint64_t now = get_time_ns();
double elapsed = (double)(now - g_last_switch_ns) / 1e9;
return elapsed >= (double)g_config.cooldown_sec;
}
// Read environment variable or return default
static const char* get_env_or_default(const char* name, const char* default_val) {
const char* val = getenv(name);
return val ? val : default_val;
}
// Parse HAKMEM_EVO environment variable
static hak_evo_policy_t parse_evo_policy(const char* str) {
if (strcmp(str, "learn") == 0) return EVO_POLICY_LEARN;
if (strcmp(str, "frozen") == 0) return EVO_POLICY_FROZEN;
if (strcmp(str, "off") == 0) return EVO_POLICY_OFF;
return EVO_POLICY_AUTO; // Default
}
// Apply preset configuration
static void apply_preset(const char* preset) {
if (strcmp(preset, "production") == 0) {
// Production: auto mode, minimal overhead
setenv("HAKMEM_EVO", "auto", 0); // Don't override if already set
setenv("HAKMEM_CANARY_FRAC", "0.05", 0);
setenv("HAKMEM_THP", "auto", 0);
setenv("HAKMEM_FREEZE_SEC", "180", 0);
setenv("HAKMEM_RELEARN_DELTA", "0.20", 0);
} else if (strcmp(preset, "debug") == 0) {
// Debug: force learning, verbose logs
setenv("HAKMEM_EVO", "learn", 0);
setenv("HAKMEM_CANARY_FRAC", "0.10", 0); // More exploration
setenv("HAKMEM_FREEZE_SEC", "60", 0); // Faster freeze for testing
} else if (strcmp(preset, "frozen") == 0) {
// Frozen: production with learning disabled
setenv("HAKMEM_EVO", "frozen", 0);
setenv("HAKMEM_THP", "auto", 0);
}
}
// Initialize configuration from environment variables
static void init_config(void) {
// Check for preset first
const char* preset = getenv("HAKMEM_PRESET");
if (preset) {
apply_preset(preset);
}
// Policy
const char* evo_str = get_env_or_default("HAKMEM_EVO", "auto");
g_config.policy = parse_evo_policy(evo_str);
// Freeze conditions
g_config.freeze_time_sec = atoi(get_env_or_default("HAKMEM_FREEZE_SEC", "180"));
g_config.freeze_epsilon = atof(get_env_or_default("HAKMEM_FREEZE_EPSILON", "0.01"));
g_config.freeze_window_count = atoi(get_env_or_default("HAKMEM_FREEZE_WINDOWS", "3"));
// Re-learning triggers
g_config.relearn_delta = atof(get_env_or_default("HAKMEM_RELEARN_DELTA", "0.20"));
g_config.relearn_L_windows = atoi(get_env_or_default("HAKMEM_RELEARN_L", "3"));
g_config.dist_delta = atof(get_env_or_default("HAKMEM_DIST_DELTA", "0.25"));
// Window configuration
g_config.window_ops = atoi(get_env_or_default("HAKMEM_WINDOW_OPS", "10000"));
g_config.window_sec = atoi(get_env_or_default("HAKMEM_WINDOW_SEC", "2"));
// CANARY configuration
g_config.canary_frac = atof(get_env_or_default("HAKMEM_CANARY_FRAC", "0.05"));
g_config.canary_timeout_sec = atoi(get_env_or_default("HAKMEM_CANARY_TIMEOUT", "60"));
// Cooldown
g_config.cooldown_sec = atoi(get_env_or_default("HAKMEM_COOLDOWN_SEC", "30"));
}
// ============================================================================
// State Transition Logic
// ============================================================================
// Check if improvement in last W windows is < ε (convergence)
static int is_converged(void) {
if (g_p2_history_count < (int)g_config.freeze_window_count) {
return 0; // Not enough history
}
// Calculate average improvement over last W windows
int W = (int)g_config.freeze_window_count;
double sum_improvement = 0.0;
for (int i = 0; i < W - 1; i++) {
int idx1 = g_p2_history_count - W + i;
int idx2 = g_p2_history_count - W + i + 1;
if (idx1 >= 0 && idx2 >= 0 && idx1 < 10 && idx2 < 10) {
double p99_prev = hak_p2_get(&g_p2_history[idx1]);
double p99_curr = hak_p2_get(&g_p2_history[idx2]);
if (p99_prev > 0.0) {
double improvement = (p99_prev - p99_curr) / p99_prev;
sum_improvement += improvement;
}
}
}
double avg_improvement = sum_improvement / (double)(W - 1);
return avg_improvement < g_config.freeze_epsilon;
}
// Switch to new mode
static void switch_mode(hak_evo_mode_t new_mode) {
hak_evo_mode_t old_mode = atomic_load(&g_mode);
if (old_mode == new_mode) return;
atomic_store(&g_mode, new_mode);
g_last_switch_ns = get_time_ns();
// Handle mode-specific actions
if (new_mode == EVO_MODE_FROZEN) {
g_baseline_p99 = hak_p2_get(&g_p2_current);
hak_sizeclass_dist_copy(&g_dist_baseline, &g_dist_current);
g_stats.freeze_count++;
printf("[EVO] LEARN → FROZEN (baseline p99=%.2f ns, uptime=%.1fs)\n",
g_baseline_p99, get_uptime_sec());
} else if (new_mode == EVO_MODE_CANARY) {
printf("[EVO] FROZEN → CANARY (re-learning trigger detected)\n");
} else if (new_mode == EVO_MODE_LEARN) {
printf("[EVO] CANARY → LEARN (candidate policy accepted)\n");
g_stats.canary_success++;
}
}
// State machine tick (called on window closure)
static void state_machine_tick(void) {
hak_evo_mode_t mode = atomic_load(&g_mode);
// Forced modes (policy override)
if (g_config.policy == EVO_POLICY_LEARN && mode != EVO_MODE_LEARN) {
switch_mode(EVO_MODE_LEARN);
return;
}
if (g_config.policy == EVO_POLICY_FROZEN && mode != EVO_MODE_FROZEN) {
switch_mode(EVO_MODE_FROZEN);
return;
}
if (g_config.policy == EVO_POLICY_OFF) {
return; // No state transitions
}
// AUTO policy: State machine logic
switch (mode) {
case EVO_MODE_LEARN:
// LEARN → FROZEN conditions
if (get_uptime_sec() >= (double)g_config.freeze_time_sec &&
is_converged() &&
cooldown_passed()) {
switch_mode(EVO_MODE_FROZEN);
}
break;
case EVO_MODE_FROZEN:
// FROZEN → CANARY conditions
if (cooldown_passed()) {
// Check degradation
double p99_current = hak_p2_get(&g_p2_current);
if (g_baseline_p99 > 0.0) {
double degradation = (p99_current / g_baseline_p99) - 1.0;
if (degradation >= g_config.relearn_delta) {
g_consecutive_bad_windows++;
} else {
g_consecutive_bad_windows = 0;
}
if (g_consecutive_bad_windows >= g_config.relearn_L_windows) {
switch_mode(EVO_MODE_CANARY);
g_consecutive_bad_windows = 0;
g_stats.relearn_count++;
}
}
// Check distribution change
double dist_l1 = hak_sizeclass_dist_l1(&g_dist_current, &g_dist_baseline);
if (dist_l1 >= g_config.dist_delta) {
printf("[EVO] Distribution change detected (L1=%.3f >= %.3f)\n",
dist_l1, g_config.dist_delta);
switch_mode(EVO_MODE_CANARY);
g_stats.relearn_count++;
}
}
break;
case EVO_MODE_CANARY:
// CANARY → LEARN or FROZEN
// Simplified: For now, accept candidate if p99 improved
{
double p99_current = hak_p2_get(&g_p2_current);
if (p99_current < g_baseline_p99) {
// Improved! Accept and continue learning
switch_mode(EVO_MODE_LEARN);
} else {
// No improvement or timeout → rollback
printf("[EVO] CANARY → FROZEN (rollback, no improvement)\n");
switch_mode(EVO_MODE_FROZEN);
g_stats.canary_rollback++;
}
}
break;
}
}
// ============================================================================
// Public API Implementation
// ============================================================================
void hak_evo_init(void) {
if (g_initialized) return;
// Initialize config
init_config();
// Initialize state (apply forced mode if needed)
if (g_config.policy == EVO_POLICY_FROZEN) {
atomic_store(&g_mode, EVO_MODE_FROZEN);
} else {
atomic_store(&g_mode, EVO_MODE_LEARN);
}
// Initialize P² estimators
hak_p2_init(&g_p2_current, 0.99);
for (int i = 0; i < 10; i++) {
hak_p2_init(&g_p2_history[i], 0.99);
}
g_p2_history_count = 0;
// Initialize CANARY p99 estimator (Step 5)
hak_p2_init(&g_canary_p99, 0.99);
// Initialize distributions
hak_sizeclass_dist_init(&g_dist_current);
hak_sizeclass_dist_init(&g_dist_baseline);
// Initialize state
g_baseline_p99 = INFINITY;
g_window_ops_count = 0;
g_window_start_ns = get_time_ns();
g_uptime_start_ns = g_window_start_ns;
g_last_switch_ns = g_uptime_start_ns;
g_consecutive_bad_windows = 0;
// Initialize statistics
memset(&g_stats, 0, sizeof(g_stats));
g_initialized = 1;
const char* policy_str[] = {"AUTO", "LEARN", "FROZEN", "OFF"};
printf("[EVO] Initialized (policy=%s, freeze_sec=%lu, relearn_delta=%.2f%%)\n",
policy_str[g_config.policy],
(unsigned long)g_config.freeze_time_sec,
g_config.relearn_delta * 100.0);
}
void hak_evo_shutdown(void) {
if (!g_initialized) return;
hak_evo_print_stats();
g_initialized = 0;
}
int hak_evo_tick(uint64_t now_ns) {
if (!g_initialized) hak_evo_init();
// Check window closure
uint64_t elapsed_ns = now_ns - g_window_start_ns;
double elapsed_sec = (double)elapsed_ns / 1e9;
int should_close = (g_window_ops_count >= g_config.window_ops) ||
(elapsed_sec >= (double)g_config.window_sec);
if (should_close) {
// Save current window to history
if (g_p2_history_count < 10) {
hak_p2_copy(&g_p2_history[g_p2_history_count], &g_p2_current);
g_p2_history_count++;
} else {
// Shift history
for (int i = 0; i < 9; i++) {
hak_p2_copy(&g_p2_history[i], &g_p2_history[i + 1]);
}
hak_p2_copy(&g_p2_history[9], &g_p2_current);
}
// Run state machine
state_machine_tick();
// Reset window
hak_p2_reset(&g_p2_current);
hak_sizeclass_dist_reset(&g_dist_current);
g_window_ops_count = 0;
g_window_start_ns = now_ns;
g_stats.windows_completed++;
return 1; // State may have changed
}
return 0;
}
void hak_evo_record_latency(double latency_ns) {
if (!g_initialized) hak_evo_init();
hak_p2_add(&g_p2_current, latency_ns);
g_stats.total_samples_latency++;
g_window_ops_count++;
}
void hak_evo_record_size(size_t size) {
if (!g_initialized) hak_evo_init();
hak_sizeclass_dist_record(&g_dist_current, size);
g_stats.total_samples_size++;
}
hak_evo_mode_t hak_evo_get_mode(void) {
return atomic_load(&g_mode);
}
int hak_evo_is_frozen(void) {
return atomic_load(&g_mode) == EVO_MODE_FROZEN;
}
int hak_evo_is_canary(void) {
return atomic_load(&g_mode) == EVO_MODE_CANARY;
}
double hak_evo_get_p99(void) {
return hak_p2_get(&g_p2_current);
}
void hak_evo_force_mode(hak_evo_mode_t mode) {
atomic_store(&g_mode, mode);
}
const hak_evo_config_t* hak_evo_get_config(void) {
return &g_config;
}
void hak_evo_print_stats(void) {
printf("\n========================================\n");
printf("Evolution Lifecycle Statistics\n");
printf("========================================\n");
const char* mode_str[] = {"LEARN", "FROZEN", "CANARY"};
printf("Current Mode: %s\n", mode_str[atomic_load(&g_mode)]);
printf("Uptime: %.1f sec\n", get_uptime_sec());
printf("Baseline p99: %.2f ns\n", g_baseline_p99);
printf("Current p99: %.2f ns\n", hak_evo_get_p99());
printf("\n");
printf("Samples (latency): %lu\n", (unsigned long)g_stats.total_samples_latency);
printf("Samples (size): %lu\n", (unsigned long)g_stats.total_samples_size);
printf("Windows completed: %lu\n", (unsigned long)g_stats.windows_completed);
printf("\n");
printf("Freeze count: %lu\n", (unsigned long)g_stats.freeze_count);
printf("Re-learn count: %lu\n", (unsigned long)g_stats.relearn_count);
printf("CANARY success: %lu\n", (unsigned long)g_stats.canary_success);
printf("CANARY rollback: %lu\n", (unsigned long)g_stats.canary_rollback);
printf("========================================\n");
}
// Helper: P² copy (not in hakmem_p2.h yet)
static void hak_p2_copy(hak_p2_t* dst, const hak_p2_t* src) {
memcpy(dst, src, sizeof(hak_p2_t));
}
// ============================================================================
// CANARY Mode Implementation (Step 5)
// ============================================================================
int hak_evo_get_confirmed_strategy(void) {
return g_confirmed_strategy;
}
int hak_evo_get_candidate_strategy(void) {
return g_candidate_strategy;
}
int hak_evo_should_use_candidate(void) {
// 5% sampling: use candidate with 5% probability
// Simple approach: check if (rand() % 100 < 5)
// Thread-safe version using atomic counter
static _Atomic uint64_t counter = 0;
uint64_t sample_id = atomic_fetch_add(&counter, 1);
// Use modulo 20 for 5% (1 out of 20)
return (sample_id % 20) == 0;
}
void hak_evo_record_canary_result(int strategy_id, double latency_ns) {
(void)strategy_id; // Currently unused (could track per-strategy)
// Record latency in CANARY p99 estimator
hak_p2_add(&g_canary_p99, latency_ns);
atomic_fetch_add(&g_canary_sample_count, 1);
}