Files
hakmem/core/hakmem_ucb1.c

335 lines
11 KiB
C
Raw Normal View History

// hakmem_ucb1.c - UCB1 Bandit Implementation
// Purpose: Automatic policy evolution via reinforcement learning
//
// License: MIT
// Date: 2025-10-21
#include "hakmem.h"
#include <math.h>
#include <string.h>
#include <time.h>
#include <stdio.h>
// ============================================================================
// Configuration
// ============================================================================
#define UCB1_EXPLORATION_FACTOR 2.0 // √(2 × ln(N) / n)
#define HYSTERESIS_IMPROVE_PCT 0.08 // 8% improvement required
#define HYSTERESIS_CONSECUTIVE 3 // 3 consecutive improvements
#define COOLDOWN_SECS 180 // 3 minutes cooldown
// ============================================================================
// Discrete Step Tables
// ============================================================================
// mmap_threshold steps (6 levels)
static const size_t MMAP_THRESHOLD_STEPS[STEP_COUNT] = {
64 * 1024, // 64KB
128 * 1024, // 128KB
256 * 1024, // 256KB
512 * 1024, // 512KB
1024 * 1024, // 1MB
2048 * 1024, // 2MB
};
// Convert step enum to bytes
static inline size_t step_to_bytes(MmapThresholdStep step) {
if (step >= STEP_COUNT) return MMAP_THRESHOLD_STEPS[STEP_COUNT - 1];
return MMAP_THRESHOLD_STEPS[step];
}
// ============================================================================
// UCB1 State (per call-site)
// ============================================================================
typedef struct {
// Per-step statistics
double avg_reward[STEP_COUNT]; // Average reward for each step
uint64_t step_trials[STEP_COUNT]; // Trial count for each step
// Current state
MmapThresholdStep current_step; // Currently active step
uint64_t total_trials; // Total trials across all steps
// Hysteresis (safety mechanism)
MmapThresholdStep candidate_step; // Candidate for next step
uint32_t consecutive_count; // Consecutive improvements
// Cooldown (stability mechanism)
uint64_t last_adoption_time_ms; // Last time we adopted a new step
} UCB1State;
// ============================================================================
// Global UCB1 State
// ============================================================================
static UCB1State g_ucb1_states[256]; // Per-site UCB1 state (simplified)
static int g_evolution_enabled = 0; // 0 = baseline, 1 = evolving
// ============================================================================
// KPI Measurement
// ============================================================================
// Get current timestamp in milliseconds
static uint64_t get_time_ms(void) {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
}
// Normalize value to [0, 1] range
static double normalize(double value, double min, double max) {
double range = max - min;
if (range < 0.001) return 0.5; // Avoid division by zero
return (value - min) / range;
}
// Calculate reward from KPI
// Lower is better (negative reward for high latency/PF/RSS)
static double calculate_reward(const hak_kpi_t* kpi) {
// Normalize KPIs to [0, 1]
double norm_p99 = normalize((double)kpi->p99_alloc_ns, 0.0, 1000.0); // 0-1000ns
double norm_pf = normalize((double)kpi->hard_page_faults, 0.0, 100.0); // 0-100 faults
double norm_rss = normalize((double)kpi->rss_delta_mb, -10.0, 10.0); // -10MB ~ +10MB
// Reward = minimize cost
// P99 is most important (weight 1.0)
// Page Faults are moderately important (weight 0.5)
// RSS is less important (weight 0.2)
double reward = -(norm_p99 + 0.5 * norm_pf + 0.2 * norm_rss);
return reward;
}
// ============================================================================
// UCB1 Algorithm
// ============================================================================
// Calculate UCB1 score for a step
static double ucb1_score(
const UCB1State* state,
MmapThresholdStep step
) {
// If never tried, return infinity (prioritize exploration)
if (state->step_trials[step] == 0) {
return INFINITY;
}
// UCB1 formula: avg_reward + √(C × ln(N) / n)
// C = exploration factor (2.0)
// N = total trials
// n = trials for this step
double avg_reward = state->avg_reward[step];
double exploration_bonus = sqrt(
UCB1_EXPLORATION_FACTOR * log((double)state->total_trials) /
(double)state->step_trials[step]
);
return avg_reward + exploration_bonus;
}
// Select best step using UCB1 (only ±1 neighbors)
static MmapThresholdStep ucb1_select(const UCB1State* state) {
MmapThresholdStep current = state->current_step;
MmapThresholdStep best_step = current;
double best_score = ucb1_score(state, current);
// Try previous step (if exists)
if (current > 0) {
MmapThresholdStep prev = (MmapThresholdStep)(current - 1);
double score = ucb1_score(state, prev);
if (score > best_score) {
best_score = score;
best_step = prev;
}
}
// Try next step (if exists)
if (current < STEP_COUNT - 1) {
MmapThresholdStep next = (MmapThresholdStep)(current + 1);
double score = ucb1_score(state, next);
if (score > best_score) {
best_score = score;
best_step = next;
}
}
return best_step;
}
// Update statistics for a step
static void ucb1_update(UCB1State* state, MmapThresholdStep step, double reward) {
// Update running average
uint64_t n = state->step_trials[step];
if (n == 0) {
state->avg_reward[step] = reward;
} else {
// Incremental average: avg_new = (avg_old × n + reward) / (n + 1)
state->avg_reward[step] = (state->avg_reward[step] * n + reward) / (n + 1);
}
// Update counts
state->step_trials[step]++;
state->total_trials++;
}
// ============================================================================
// Hysteresis (Safety Mechanism)
// ============================================================================
static int hysteresis_should_adopt(
UCB1State* state,
MmapThresholdStep new_step,
double current_reward,
double new_reward
) {
// Calculate improvement percentage
double improvement = (new_reward - current_reward) / fabs(current_reward);
// Check if improvement is sufficient
if (improvement >= HYSTERESIS_IMPROVE_PCT) {
// Same candidate as before?
if (state->candidate_step == new_step) {
state->consecutive_count++;
} else {
// New candidate, reset counter
state->candidate_step = new_step;
state->consecutive_count = 1;
}
// Reached threshold?
if (state->consecutive_count >= HYSTERESIS_CONSECUTIVE) {
// Reset and adopt!
state->consecutive_count = 0;
state->candidate_step = (MmapThresholdStep)-1; // Invalid
return 1;
}
} else {
// Improvement insufficient, reset
state->consecutive_count = 0;
state->candidate_step = (MmapThresholdStep)-1;
}
return 0;
}
// ============================================================================
// Cooldown (Stability Mechanism)
// ============================================================================
static int cooldown_can_adjust(const UCB1State* state) {
if (state->last_adoption_time_ms == 0) {
return 1; // First time, always OK
}
uint64_t now = get_time_ms();
uint64_t elapsed_ms = now - state->last_adoption_time_ms;
uint64_t elapsed_secs = elapsed_ms / 1000;
return elapsed_secs >= COOLDOWN_SECS;
}
static void cooldown_record_adoption(UCB1State* state) {
state->last_adoption_time_ms = get_time_ms();
}
// ============================================================================
// Evolution Cycle (Main Logic)
// ============================================================================
void hak_trigger_evolution(void) {
if (!g_evolution_enabled) {
return; // Evolution disabled
}
printf("\n[UCB1] Evolution cycle triggered\n");
// For PoC, we only evolve the first active site
// Real implementation would iterate all sites
UCB1State* state = &g_ucb1_states[0];
// 1. Check cooldown
if (!cooldown_can_adjust(state)) {
printf("[UCB1] Cooldown active, skipping evolution\n");
return;
}
// 2. Measure current KPI
hak_kpi_t kpi;
hak_get_kpi(&kpi);
// 3. Calculate reward
double reward = calculate_reward(&kpi);
printf("[UCB1] Current reward: %.3f (P99=%lu ns, PF=%lu)\n",
reward,
(unsigned long)kpi.p99_alloc_ns,
(unsigned long)kpi.hard_page_faults);
// 4. Update statistics for current step
ucb1_update(state, state->current_step, reward);
// 5. Select best step using UCB1
MmapThresholdStep best_step = ucb1_select(state);
printf("[UCB1] UCB1 selected step: %d (current: %d)\n",
best_step, state->current_step);
// 6. Check if different from current
if (best_step != state->current_step) {
// Get current reward (average of current step)
double current_reward = state->avg_reward[state->current_step];
double new_reward = state->avg_reward[best_step];
// 7. Hysteresis check
if (hysteresis_should_adopt(state, best_step, current_reward, new_reward)) {
printf("[UCB1] ✅ ADOPTING new step %d → %d (improvement: %.1f%%)\n",
state->current_step, best_step,
(new_reward - current_reward) / fabs(current_reward) * 100.0);
state->current_step = best_step;
cooldown_record_adoption(state);
} else {
printf("[UCB1] Hysteresis: need %d more consecutive improvements\n",
HYSTERESIS_CONSECUTIVE - state->consecutive_count);
}
}
printf("[UCB1] Current step: %d (%zu bytes)\n",
state->current_step, step_to_bytes(state->current_step));
}
// ============================================================================
// Public API
// ============================================================================
void hak_enable_evolution(int enable) {
g_evolution_enabled = enable;
if (enable) {
printf("[UCB1] Evolution ENABLED\n");
// Initialize UCB1 state (simplified: first site only)
UCB1State* state = &g_ucb1_states[0];
memset(state, 0, sizeof(UCB1State));
state->current_step = STEP_256KB; // Start at 256KB (reasonable default)
state->candidate_step = (MmapThresholdStep)-1; // Invalid
} else {
printf("[UCB1] Evolution DISABLED (baseline mode)\n");
}
}
// Get current step for a site (simplified: always site 0)
MmapThresholdStep hak_ucb1_get_step(void) {
if (!g_evolution_enabled) {
return STEP_256KB; // Baseline default
}
return g_ucb1_states[0].current_step;
}
// Get step size in bytes
size_t hak_ucb1_get_threshold(void) {
MmapThresholdStep step = hak_ucb1_get_step();
return step_to_bytes(step);
}