335 lines
11 KiB
C
335 lines
11 KiB
C
|
|
// hakmem_ucb1.c - UCB1 Bandit Implementation
|
|||
|
|
// Purpose: Automatic policy evolution via reinforcement learning
|
|||
|
|
//
|
|||
|
|
// License: MIT
|
|||
|
|
// Date: 2025-10-21
|
|||
|
|
|
|||
|
|
#include "hakmem.h"
|
|||
|
|
#include <math.h>
|
|||
|
|
#include <string.h>
|
|||
|
|
#include <time.h>
|
|||
|
|
#include <stdio.h>
|
|||
|
|
|
|||
|
|
// ============================================================================
|
|||
|
|
// Configuration
|
|||
|
|
// ============================================================================
|
|||
|
|
|
|||
|
|
#define UCB1_EXPLORATION_FACTOR 2.0 // √(2 × ln(N) / n)
|
|||
|
|
#define HYSTERESIS_IMPROVE_PCT 0.08 // 8% improvement required
|
|||
|
|
#define HYSTERESIS_CONSECUTIVE 3 // 3 consecutive improvements
|
|||
|
|
#define COOLDOWN_SECS 180 // 3 minutes cooldown
|
|||
|
|
|
|||
|
|
// ============================================================================
|
|||
|
|
// Discrete Step Tables
|
|||
|
|
// ============================================================================
|
|||
|
|
|
|||
|
|
// mmap_threshold steps (6 levels)
|
|||
|
|
static const size_t MMAP_THRESHOLD_STEPS[STEP_COUNT] = {
|
|||
|
|
64 * 1024, // 64KB
|
|||
|
|
128 * 1024, // 128KB
|
|||
|
|
256 * 1024, // 256KB
|
|||
|
|
512 * 1024, // 512KB
|
|||
|
|
1024 * 1024, // 1MB
|
|||
|
|
2048 * 1024, // 2MB
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
// Convert step enum to bytes
|
|||
|
|
static inline size_t step_to_bytes(MmapThresholdStep step) {
|
|||
|
|
if (step >= STEP_COUNT) return MMAP_THRESHOLD_STEPS[STEP_COUNT - 1];
|
|||
|
|
return MMAP_THRESHOLD_STEPS[step];
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ============================================================================
|
|||
|
|
// UCB1 State (per call-site)
|
|||
|
|
// ============================================================================
|
|||
|
|
|
|||
|
|
typedef struct {
|
|||
|
|
// Per-step statistics
|
|||
|
|
double avg_reward[STEP_COUNT]; // Average reward for each step
|
|||
|
|
uint64_t step_trials[STEP_COUNT]; // Trial count for each step
|
|||
|
|
|
|||
|
|
// Current state
|
|||
|
|
MmapThresholdStep current_step; // Currently active step
|
|||
|
|
uint64_t total_trials; // Total trials across all steps
|
|||
|
|
|
|||
|
|
// Hysteresis (safety mechanism)
|
|||
|
|
MmapThresholdStep candidate_step; // Candidate for next step
|
|||
|
|
uint32_t consecutive_count; // Consecutive improvements
|
|||
|
|
|
|||
|
|
// Cooldown (stability mechanism)
|
|||
|
|
uint64_t last_adoption_time_ms; // Last time we adopted a new step
|
|||
|
|
} UCB1State;
|
|||
|
|
|
|||
|
|
// ============================================================================
|
|||
|
|
// Global UCB1 State
|
|||
|
|
// ============================================================================
|
|||
|
|
|
|||
|
|
static UCB1State g_ucb1_states[256]; // Per-site UCB1 state (simplified)
|
|||
|
|
static int g_evolution_enabled = 0; // 0 = baseline, 1 = evolving
|
|||
|
|
|
|||
|
|
// ============================================================================
|
|||
|
|
// KPI Measurement
|
|||
|
|
// ============================================================================
|
|||
|
|
|
|||
|
|
// Get current timestamp in milliseconds
|
|||
|
|
static uint64_t get_time_ms(void) {
|
|||
|
|
struct timespec ts;
|
|||
|
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
|||
|
|
return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Normalize value to [0, 1] range
|
|||
|
|
static double normalize(double value, double min, double max) {
|
|||
|
|
double range = max - min;
|
|||
|
|
if (range < 0.001) return 0.5; // Avoid division by zero
|
|||
|
|
return (value - min) / range;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Calculate reward from KPI
|
|||
|
|
// Lower is better (negative reward for high latency/PF/RSS)
|
|||
|
|
static double calculate_reward(const hak_kpi_t* kpi) {
|
|||
|
|
// Normalize KPIs to [0, 1]
|
|||
|
|
double norm_p99 = normalize((double)kpi->p99_alloc_ns, 0.0, 1000.0); // 0-1000ns
|
|||
|
|
double norm_pf = normalize((double)kpi->hard_page_faults, 0.0, 100.0); // 0-100 faults
|
|||
|
|
double norm_rss = normalize((double)kpi->rss_delta_mb, -10.0, 10.0); // -10MB ~ +10MB
|
|||
|
|
|
|||
|
|
// Reward = minimize cost
|
|||
|
|
// P99 is most important (weight 1.0)
|
|||
|
|
// Page Faults are moderately important (weight 0.5)
|
|||
|
|
// RSS is less important (weight 0.2)
|
|||
|
|
double reward = -(norm_p99 + 0.5 * norm_pf + 0.2 * norm_rss);
|
|||
|
|
|
|||
|
|
return reward;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ============================================================================
|
|||
|
|
// UCB1 Algorithm
|
|||
|
|
// ============================================================================
|
|||
|
|
|
|||
|
|
// Calculate UCB1 score for a step
|
|||
|
|
static double ucb1_score(
|
|||
|
|
const UCB1State* state,
|
|||
|
|
MmapThresholdStep step
|
|||
|
|
) {
|
|||
|
|
// If never tried, return infinity (prioritize exploration)
|
|||
|
|
if (state->step_trials[step] == 0) {
|
|||
|
|
return INFINITY;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// UCB1 formula: avg_reward + √(C × ln(N) / n)
|
|||
|
|
// C = exploration factor (2.0)
|
|||
|
|
// N = total trials
|
|||
|
|
// n = trials for this step
|
|||
|
|
double avg_reward = state->avg_reward[step];
|
|||
|
|
double exploration_bonus = sqrt(
|
|||
|
|
UCB1_EXPLORATION_FACTOR * log((double)state->total_trials) /
|
|||
|
|
(double)state->step_trials[step]
|
|||
|
|
);
|
|||
|
|
|
|||
|
|
return avg_reward + exploration_bonus;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Select best step using UCB1 (only ±1 neighbors)
|
|||
|
|
static MmapThresholdStep ucb1_select(const UCB1State* state) {
|
|||
|
|
MmapThresholdStep current = state->current_step;
|
|||
|
|
MmapThresholdStep best_step = current;
|
|||
|
|
double best_score = ucb1_score(state, current);
|
|||
|
|
|
|||
|
|
// Try previous step (if exists)
|
|||
|
|
if (current > 0) {
|
|||
|
|
MmapThresholdStep prev = (MmapThresholdStep)(current - 1);
|
|||
|
|
double score = ucb1_score(state, prev);
|
|||
|
|
if (score > best_score) {
|
|||
|
|
best_score = score;
|
|||
|
|
best_step = prev;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Try next step (if exists)
|
|||
|
|
if (current < STEP_COUNT - 1) {
|
|||
|
|
MmapThresholdStep next = (MmapThresholdStep)(current + 1);
|
|||
|
|
double score = ucb1_score(state, next);
|
|||
|
|
if (score > best_score) {
|
|||
|
|
best_score = score;
|
|||
|
|
best_step = next;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return best_step;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Update statistics for a step
|
|||
|
|
static void ucb1_update(UCB1State* state, MmapThresholdStep step, double reward) {
|
|||
|
|
// Update running average
|
|||
|
|
uint64_t n = state->step_trials[step];
|
|||
|
|
if (n == 0) {
|
|||
|
|
state->avg_reward[step] = reward;
|
|||
|
|
} else {
|
|||
|
|
// Incremental average: avg_new = (avg_old × n + reward) / (n + 1)
|
|||
|
|
state->avg_reward[step] = (state->avg_reward[step] * n + reward) / (n + 1);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Update counts
|
|||
|
|
state->step_trials[step]++;
|
|||
|
|
state->total_trials++;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ============================================================================
|
|||
|
|
// Hysteresis (Safety Mechanism)
|
|||
|
|
// ============================================================================
|
|||
|
|
|
|||
|
|
static int hysteresis_should_adopt(
|
|||
|
|
UCB1State* state,
|
|||
|
|
MmapThresholdStep new_step,
|
|||
|
|
double current_reward,
|
|||
|
|
double new_reward
|
|||
|
|
) {
|
|||
|
|
// Calculate improvement percentage
|
|||
|
|
double improvement = (new_reward - current_reward) / fabs(current_reward);
|
|||
|
|
|
|||
|
|
// Check if improvement is sufficient
|
|||
|
|
if (improvement >= HYSTERESIS_IMPROVE_PCT) {
|
|||
|
|
// Same candidate as before?
|
|||
|
|
if (state->candidate_step == new_step) {
|
|||
|
|
state->consecutive_count++;
|
|||
|
|
} else {
|
|||
|
|
// New candidate, reset counter
|
|||
|
|
state->candidate_step = new_step;
|
|||
|
|
state->consecutive_count = 1;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Reached threshold?
|
|||
|
|
if (state->consecutive_count >= HYSTERESIS_CONSECUTIVE) {
|
|||
|
|
// Reset and adopt!
|
|||
|
|
state->consecutive_count = 0;
|
|||
|
|
state->candidate_step = (MmapThresholdStep)-1; // Invalid
|
|||
|
|
return 1;
|
|||
|
|
}
|
|||
|
|
} else {
|
|||
|
|
// Improvement insufficient, reset
|
|||
|
|
state->consecutive_count = 0;
|
|||
|
|
state->candidate_step = (MmapThresholdStep)-1;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
return 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ============================================================================
|
|||
|
|
// Cooldown (Stability Mechanism)
|
|||
|
|
// ============================================================================
|
|||
|
|
|
|||
|
|
static int cooldown_can_adjust(const UCB1State* state) {
|
|||
|
|
if (state->last_adoption_time_ms == 0) {
|
|||
|
|
return 1; // First time, always OK
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
uint64_t now = get_time_ms();
|
|||
|
|
uint64_t elapsed_ms = now - state->last_adoption_time_ms;
|
|||
|
|
uint64_t elapsed_secs = elapsed_ms / 1000;
|
|||
|
|
|
|||
|
|
return elapsed_secs >= COOLDOWN_SECS;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
static void cooldown_record_adoption(UCB1State* state) {
|
|||
|
|
state->last_adoption_time_ms = get_time_ms();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ============================================================================
|
|||
|
|
// Evolution Cycle (Main Logic)
|
|||
|
|
// ============================================================================
|
|||
|
|
|
|||
|
|
void hak_trigger_evolution(void) {
|
|||
|
|
if (!g_evolution_enabled) {
|
|||
|
|
return; // Evolution disabled
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
printf("\n[UCB1] Evolution cycle triggered\n");
|
|||
|
|
|
|||
|
|
// For PoC, we only evolve the first active site
|
|||
|
|
// Real implementation would iterate all sites
|
|||
|
|
UCB1State* state = &g_ucb1_states[0];
|
|||
|
|
|
|||
|
|
// 1. Check cooldown
|
|||
|
|
if (!cooldown_can_adjust(state)) {
|
|||
|
|
printf("[UCB1] Cooldown active, skipping evolution\n");
|
|||
|
|
return;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// 2. Measure current KPI
|
|||
|
|
hak_kpi_t kpi;
|
|||
|
|
hak_get_kpi(&kpi);
|
|||
|
|
|
|||
|
|
// 3. Calculate reward
|
|||
|
|
double reward = calculate_reward(&kpi);
|
|||
|
|
printf("[UCB1] Current reward: %.3f (P99=%lu ns, PF=%lu)\n",
|
|||
|
|
reward,
|
|||
|
|
(unsigned long)kpi.p99_alloc_ns,
|
|||
|
|
(unsigned long)kpi.hard_page_faults);
|
|||
|
|
|
|||
|
|
// 4. Update statistics for current step
|
|||
|
|
ucb1_update(state, state->current_step, reward);
|
|||
|
|
|
|||
|
|
// 5. Select best step using UCB1
|
|||
|
|
MmapThresholdStep best_step = ucb1_select(state);
|
|||
|
|
|
|||
|
|
printf("[UCB1] UCB1 selected step: %d (current: %d)\n",
|
|||
|
|
best_step, state->current_step);
|
|||
|
|
|
|||
|
|
// 6. Check if different from current
|
|||
|
|
if (best_step != state->current_step) {
|
|||
|
|
// Get current reward (average of current step)
|
|||
|
|
double current_reward = state->avg_reward[state->current_step];
|
|||
|
|
double new_reward = state->avg_reward[best_step];
|
|||
|
|
|
|||
|
|
// 7. Hysteresis check
|
|||
|
|
if (hysteresis_should_adopt(state, best_step, current_reward, new_reward)) {
|
|||
|
|
printf("[UCB1] ✅ ADOPTING new step %d → %d (improvement: %.1f%%)\n",
|
|||
|
|
state->current_step, best_step,
|
|||
|
|
(new_reward - current_reward) / fabs(current_reward) * 100.0);
|
|||
|
|
|
|||
|
|
state->current_step = best_step;
|
|||
|
|
cooldown_record_adoption(state);
|
|||
|
|
} else {
|
|||
|
|
printf("[UCB1] Hysteresis: need %d more consecutive improvements\n",
|
|||
|
|
HYSTERESIS_CONSECUTIVE - state->consecutive_count);
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
printf("[UCB1] Current step: %d (%zu bytes)\n",
|
|||
|
|
state->current_step, step_to_bytes(state->current_step));
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// ============================================================================
|
|||
|
|
// Public API
|
|||
|
|
// ============================================================================
|
|||
|
|
|
|||
|
|
void hak_enable_evolution(int enable) {
|
|||
|
|
g_evolution_enabled = enable;
|
|||
|
|
|
|||
|
|
if (enable) {
|
|||
|
|
printf("[UCB1] Evolution ENABLED\n");
|
|||
|
|
|
|||
|
|
// Initialize UCB1 state (simplified: first site only)
|
|||
|
|
UCB1State* state = &g_ucb1_states[0];
|
|||
|
|
memset(state, 0, sizeof(UCB1State));
|
|||
|
|
state->current_step = STEP_256KB; // Start at 256KB (reasonable default)
|
|||
|
|
state->candidate_step = (MmapThresholdStep)-1; // Invalid
|
|||
|
|
} else {
|
|||
|
|
printf("[UCB1] Evolution DISABLED (baseline mode)\n");
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Get current step for a site (simplified: always site 0)
|
|||
|
|
MmapThresholdStep hak_ucb1_get_step(void) {
|
|||
|
|
if (!g_evolution_enabled) {
|
|||
|
|
return STEP_256KB; // Baseline default
|
|||
|
|
}
|
|||
|
|
return g_ucb1_states[0].current_step;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
// Get step size in bytes
|
|||
|
|
size_t hak_ucb1_get_threshold(void) {
|
|||
|
|
MmapThresholdStep step = hak_ucb1_get_step();
|
|||
|
|
return step_to_bytes(step);
|
|||
|
|
}
|