Major Features: - Debug counter infrastructure for Refill Stage tracking - Free Pipeline counters (ss_local, ss_remote, tls_sll) - Diagnostic counters for early return analysis - Unified larson.sh benchmark runner with profiles - Phase 6-3 regression analysis documentation Bug Fixes: - Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB) - Fix profile variable naming consistency - Add .gitignore patterns for large files Performance: - Phase 6-3: 4.79 M ops/s (has OOM risk) - With SuperSlab: 3.13 M ops/s (+19% improvement) This is a clean repository without large log files. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
335 lines
11 KiB
C
335 lines
11 KiB
C
// hakmem_ucb1.c - UCB1 Bandit Implementation
|
||
// Purpose: Automatic policy evolution via reinforcement learning
|
||
//
|
||
// License: MIT
|
||
// Date: 2025-10-21
|
||
|
||
#include "hakmem.h"
|
||
#include <math.h>
|
||
#include <string.h>
|
||
#include <time.h>
|
||
#include <stdio.h>
|
||
|
||
// ============================================================================
|
||
// Configuration
|
||
// ============================================================================
|
||
|
||
#define UCB1_EXPLORATION_FACTOR 2.0 // √(2 × ln(N) / n)
|
||
#define HYSTERESIS_IMPROVE_PCT 0.08 // 8% improvement required
|
||
#define HYSTERESIS_CONSECUTIVE 3 // 3 consecutive improvements
|
||
#define COOLDOWN_SECS 180 // 3 minutes cooldown
|
||
|
||
// ============================================================================
|
||
// Discrete Step Tables
|
||
// ============================================================================
|
||
|
||
// mmap_threshold steps (6 levels)
|
||
static const size_t MMAP_THRESHOLD_STEPS[STEP_COUNT] = {
|
||
64 * 1024, // 64KB
|
||
128 * 1024, // 128KB
|
||
256 * 1024, // 256KB
|
||
512 * 1024, // 512KB
|
||
1024 * 1024, // 1MB
|
||
2048 * 1024, // 2MB
|
||
};
|
||
|
||
// Convert step enum to bytes
|
||
static inline size_t step_to_bytes(MmapThresholdStep step) {
|
||
if (step >= STEP_COUNT) return MMAP_THRESHOLD_STEPS[STEP_COUNT - 1];
|
||
return MMAP_THRESHOLD_STEPS[step];
|
||
}
|
||
|
||
// ============================================================================
|
||
// UCB1 State (per call-site)
|
||
// ============================================================================
|
||
|
||
typedef struct {
|
||
// Per-step statistics
|
||
double avg_reward[STEP_COUNT]; // Average reward for each step
|
||
uint64_t step_trials[STEP_COUNT]; // Trial count for each step
|
||
|
||
// Current state
|
||
MmapThresholdStep current_step; // Currently active step
|
||
uint64_t total_trials; // Total trials across all steps
|
||
|
||
// Hysteresis (safety mechanism)
|
||
MmapThresholdStep candidate_step; // Candidate for next step
|
||
uint32_t consecutive_count; // Consecutive improvements
|
||
|
||
// Cooldown (stability mechanism)
|
||
uint64_t last_adoption_time_ms; // Last time we adopted a new step
|
||
} UCB1State;
|
||
|
||
// ============================================================================
|
||
// Global UCB1 State
|
||
// ============================================================================
|
||
|
||
static UCB1State g_ucb1_states[256]; // Per-site UCB1 state (simplified)
|
||
static int g_evolution_enabled = 0; // 0 = baseline, 1 = evolving
|
||
|
||
// ============================================================================
|
||
// KPI Measurement
|
||
// ============================================================================
|
||
|
||
// Get current timestamp in milliseconds
|
||
static uint64_t get_time_ms(void) {
|
||
struct timespec ts;
|
||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||
return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
|
||
}
|
||
|
||
// Normalize value to [0, 1] range
|
||
static double normalize(double value, double min, double max) {
|
||
double range = max - min;
|
||
if (range < 0.001) return 0.5; // Avoid division by zero
|
||
return (value - min) / range;
|
||
}
|
||
|
||
// Calculate reward from KPI
|
||
// Lower is better (negative reward for high latency/PF/RSS)
|
||
static double calculate_reward(const hak_kpi_t* kpi) {
|
||
// Normalize KPIs to [0, 1]
|
||
double norm_p99 = normalize((double)kpi->p99_alloc_ns, 0.0, 1000.0); // 0-1000ns
|
||
double norm_pf = normalize((double)kpi->hard_page_faults, 0.0, 100.0); // 0-100 faults
|
||
double norm_rss = normalize((double)kpi->rss_delta_mb, -10.0, 10.0); // -10MB ~ +10MB
|
||
|
||
// Reward = minimize cost
|
||
// P99 is most important (weight 1.0)
|
||
// Page Faults are moderately important (weight 0.5)
|
||
// RSS is less important (weight 0.2)
|
||
double reward = -(norm_p99 + 0.5 * norm_pf + 0.2 * norm_rss);
|
||
|
||
return reward;
|
||
}
|
||
|
||
// ============================================================================
|
||
// UCB1 Algorithm
|
||
// ============================================================================
|
||
|
||
// Calculate UCB1 score for a step
|
||
static double ucb1_score(
|
||
const UCB1State* state,
|
||
MmapThresholdStep step
|
||
) {
|
||
// If never tried, return infinity (prioritize exploration)
|
||
if (state->step_trials[step] == 0) {
|
||
return INFINITY;
|
||
}
|
||
|
||
// UCB1 formula: avg_reward + √(C × ln(N) / n)
|
||
// C = exploration factor (2.0)
|
||
// N = total trials
|
||
// n = trials for this step
|
||
double avg_reward = state->avg_reward[step];
|
||
double exploration_bonus = sqrt(
|
||
UCB1_EXPLORATION_FACTOR * log((double)state->total_trials) /
|
||
(double)state->step_trials[step]
|
||
);
|
||
|
||
return avg_reward + exploration_bonus;
|
||
}
|
||
|
||
// Select best step using UCB1 (only ±1 neighbors)
|
||
static MmapThresholdStep ucb1_select(const UCB1State* state) {
|
||
MmapThresholdStep current = state->current_step;
|
||
MmapThresholdStep best_step = current;
|
||
double best_score = ucb1_score(state, current);
|
||
|
||
// Try previous step (if exists)
|
||
if (current > 0) {
|
||
MmapThresholdStep prev = (MmapThresholdStep)(current - 1);
|
||
double score = ucb1_score(state, prev);
|
||
if (score > best_score) {
|
||
best_score = score;
|
||
best_step = prev;
|
||
}
|
||
}
|
||
|
||
// Try next step (if exists)
|
||
if (current < STEP_COUNT - 1) {
|
||
MmapThresholdStep next = (MmapThresholdStep)(current + 1);
|
||
double score = ucb1_score(state, next);
|
||
if (score > best_score) {
|
||
best_score = score;
|
||
best_step = next;
|
||
}
|
||
}
|
||
|
||
return best_step;
|
||
}
|
||
|
||
// Update statistics for a step
|
||
static void ucb1_update(UCB1State* state, MmapThresholdStep step, double reward) {
|
||
// Update running average
|
||
uint64_t n = state->step_trials[step];
|
||
if (n == 0) {
|
||
state->avg_reward[step] = reward;
|
||
} else {
|
||
// Incremental average: avg_new = (avg_old × n + reward) / (n + 1)
|
||
state->avg_reward[step] = (state->avg_reward[step] * n + reward) / (n + 1);
|
||
}
|
||
|
||
// Update counts
|
||
state->step_trials[step]++;
|
||
state->total_trials++;
|
||
}
|
||
|
||
// ============================================================================
|
||
// Hysteresis (Safety Mechanism)
|
||
// ============================================================================
|
||
|
||
static int hysteresis_should_adopt(
|
||
UCB1State* state,
|
||
MmapThresholdStep new_step,
|
||
double current_reward,
|
||
double new_reward
|
||
) {
|
||
// Calculate improvement percentage
|
||
double improvement = (new_reward - current_reward) / fabs(current_reward);
|
||
|
||
// Check if improvement is sufficient
|
||
if (improvement >= HYSTERESIS_IMPROVE_PCT) {
|
||
// Same candidate as before?
|
||
if (state->candidate_step == new_step) {
|
||
state->consecutive_count++;
|
||
} else {
|
||
// New candidate, reset counter
|
||
state->candidate_step = new_step;
|
||
state->consecutive_count = 1;
|
||
}
|
||
|
||
// Reached threshold?
|
||
if (state->consecutive_count >= HYSTERESIS_CONSECUTIVE) {
|
||
// Reset and adopt!
|
||
state->consecutive_count = 0;
|
||
state->candidate_step = (MmapThresholdStep)-1; // Invalid
|
||
return 1;
|
||
}
|
||
} else {
|
||
// Improvement insufficient, reset
|
||
state->consecutive_count = 0;
|
||
state->candidate_step = (MmapThresholdStep)-1;
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
// ============================================================================
|
||
// Cooldown (Stability Mechanism)
|
||
// ============================================================================
|
||
|
||
static int cooldown_can_adjust(const UCB1State* state) {
|
||
if (state->last_adoption_time_ms == 0) {
|
||
return 1; // First time, always OK
|
||
}
|
||
|
||
uint64_t now = get_time_ms();
|
||
uint64_t elapsed_ms = now - state->last_adoption_time_ms;
|
||
uint64_t elapsed_secs = elapsed_ms / 1000;
|
||
|
||
return elapsed_secs >= COOLDOWN_SECS;
|
||
}
|
||
|
||
static void cooldown_record_adoption(UCB1State* state) {
|
||
state->last_adoption_time_ms = get_time_ms();
|
||
}
|
||
|
||
// ============================================================================
|
||
// Evolution Cycle (Main Logic)
|
||
// ============================================================================
|
||
|
||
void hak_trigger_evolution(void) {
|
||
if (!g_evolution_enabled) {
|
||
return; // Evolution disabled
|
||
}
|
||
|
||
printf("\n[UCB1] Evolution cycle triggered\n");
|
||
|
||
// For PoC, we only evolve the first active site
|
||
// Real implementation would iterate all sites
|
||
UCB1State* state = &g_ucb1_states[0];
|
||
|
||
// 1. Check cooldown
|
||
if (!cooldown_can_adjust(state)) {
|
||
printf("[UCB1] Cooldown active, skipping evolution\n");
|
||
return;
|
||
}
|
||
|
||
// 2. Measure current KPI
|
||
hak_kpi_t kpi;
|
||
hak_get_kpi(&kpi);
|
||
|
||
// 3. Calculate reward
|
||
double reward = calculate_reward(&kpi);
|
||
printf("[UCB1] Current reward: %.3f (P99=%lu ns, PF=%lu)\n",
|
||
reward,
|
||
(unsigned long)kpi.p99_alloc_ns,
|
||
(unsigned long)kpi.hard_page_faults);
|
||
|
||
// 4. Update statistics for current step
|
||
ucb1_update(state, state->current_step, reward);
|
||
|
||
// 5. Select best step using UCB1
|
||
MmapThresholdStep best_step = ucb1_select(state);
|
||
|
||
printf("[UCB1] UCB1 selected step: %d (current: %d)\n",
|
||
best_step, state->current_step);
|
||
|
||
// 6. Check if different from current
|
||
if (best_step != state->current_step) {
|
||
// Get current reward (average of current step)
|
||
double current_reward = state->avg_reward[state->current_step];
|
||
double new_reward = state->avg_reward[best_step];
|
||
|
||
// 7. Hysteresis check
|
||
if (hysteresis_should_adopt(state, best_step, current_reward, new_reward)) {
|
||
printf("[UCB1] ✅ ADOPTING new step %d → %d (improvement: %.1f%%)\n",
|
||
state->current_step, best_step,
|
||
(new_reward - current_reward) / fabs(current_reward) * 100.0);
|
||
|
||
state->current_step = best_step;
|
||
cooldown_record_adoption(state);
|
||
} else {
|
||
printf("[UCB1] Hysteresis: need %d more consecutive improvements\n",
|
||
HYSTERESIS_CONSECUTIVE - state->consecutive_count);
|
||
}
|
||
}
|
||
|
||
printf("[UCB1] Current step: %d (%zu bytes)\n",
|
||
state->current_step, step_to_bytes(state->current_step));
|
||
}
|
||
|
||
// ============================================================================
|
||
// Public API
|
||
// ============================================================================
|
||
|
||
void hak_enable_evolution(int enable) {
|
||
g_evolution_enabled = enable;
|
||
|
||
if (enable) {
|
||
printf("[UCB1] Evolution ENABLED\n");
|
||
|
||
// Initialize UCB1 state (simplified: first site only)
|
||
UCB1State* state = &g_ucb1_states[0];
|
||
memset(state, 0, sizeof(UCB1State));
|
||
state->current_step = STEP_256KB; // Start at 256KB (reasonable default)
|
||
state->candidate_step = (MmapThresholdStep)-1; // Invalid
|
||
} else {
|
||
printf("[UCB1] Evolution DISABLED (baseline mode)\n");
|
||
}
|
||
}
|
||
|
||
// Get current step for a site (simplified: always site 0)
|
||
MmapThresholdStep hak_ucb1_get_step(void) {
|
||
if (!g_evolution_enabled) {
|
||
return STEP_256KB; // Baseline default
|
||
}
|
||
return g_ucb1_states[0].current_step;
|
||
}
|
||
|
||
// Get step size in bytes
|
||
size_t hak_ucb1_get_threshold(void) {
|
||
MmapThresholdStep step = hak_ucb1_get_step();
|
||
return step_to_bytes(step);
|
||
}
|