// hakmem_ucb1.c - UCB1 Bandit Implementation // Purpose: Automatic policy evolution via reinforcement learning // // License: MIT // Date: 2025-10-21 #include "hakmem.h" #include #include #include #include // ============================================================================ // Configuration // ============================================================================ #define UCB1_EXPLORATION_FACTOR 2.0 // √(2 × ln(N) / n) #define HYSTERESIS_IMPROVE_PCT 0.08 // 8% improvement required #define HYSTERESIS_CONSECUTIVE 3 // 3 consecutive improvements #define COOLDOWN_SECS 180 // 3 minutes cooldown // ============================================================================ // Discrete Step Tables // ============================================================================ // mmap_threshold steps (6 levels) static const size_t MMAP_THRESHOLD_STEPS[STEP_COUNT] = { 64 * 1024, // 64KB 128 * 1024, // 128KB 256 * 1024, // 256KB 512 * 1024, // 512KB 1024 * 1024, // 1MB 2048 * 1024, // 2MB }; // Convert step enum to bytes static inline size_t step_to_bytes(MmapThresholdStep step) { if (step >= STEP_COUNT) return MMAP_THRESHOLD_STEPS[STEP_COUNT - 1]; return MMAP_THRESHOLD_STEPS[step]; } // ============================================================================ // UCB1 State (per call-site) // ============================================================================ typedef struct { // Per-step statistics double avg_reward[STEP_COUNT]; // Average reward for each step uint64_t step_trials[STEP_COUNT]; // Trial count for each step // Current state MmapThresholdStep current_step; // Currently active step uint64_t total_trials; // Total trials across all steps // Hysteresis (safety mechanism) MmapThresholdStep candidate_step; // Candidate for next step uint32_t consecutive_count; // Consecutive improvements // Cooldown (stability mechanism) uint64_t last_adoption_time_ms; // Last time we adopted a new step } UCB1State; // ============================================================================ // Global UCB1 State // ============================================================================ static UCB1State g_ucb1_states[256]; // Per-site UCB1 state (simplified) static int g_evolution_enabled = 0; // 0 = baseline, 1 = evolving // ============================================================================ // KPI Measurement // ============================================================================ // Get current timestamp in milliseconds static uint64_t get_time_ms(void) { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000; } // Normalize value to [0, 1] range static double normalize(double value, double min, double max) { double range = max - min; if (range < 0.001) return 0.5; // Avoid division by zero return (value - min) / range; } // Calculate reward from KPI // Lower is better (negative reward for high latency/PF/RSS) static double calculate_reward(const hak_kpi_t* kpi) { // Normalize KPIs to [0, 1] double norm_p99 = normalize((double)kpi->p99_alloc_ns, 0.0, 1000.0); // 0-1000ns double norm_pf = normalize((double)kpi->hard_page_faults, 0.0, 100.0); // 0-100 faults double norm_rss = normalize((double)kpi->rss_delta_mb, -10.0, 10.0); // -10MB ~ +10MB // Reward = minimize cost // P99 is most important (weight 1.0) // Page Faults are moderately important (weight 0.5) // RSS is less important (weight 0.2) double reward = -(norm_p99 + 0.5 * norm_pf + 0.2 * norm_rss); return reward; } // ============================================================================ // UCB1 Algorithm // ============================================================================ // Calculate UCB1 score for a step static double ucb1_score( const UCB1State* state, MmapThresholdStep step ) { // If never tried, return infinity (prioritize exploration) if (state->step_trials[step] == 0) { return INFINITY; } // UCB1 formula: avg_reward + √(C × ln(N) / n) // C = exploration factor (2.0) // N = total trials // n = trials for this step double avg_reward = state->avg_reward[step]; double exploration_bonus = sqrt( UCB1_EXPLORATION_FACTOR * log((double)state->total_trials) / (double)state->step_trials[step] ); return avg_reward + exploration_bonus; } // Select best step using UCB1 (only ±1 neighbors) static MmapThresholdStep ucb1_select(const UCB1State* state) { MmapThresholdStep current = state->current_step; MmapThresholdStep best_step = current; double best_score = ucb1_score(state, current); // Try previous step (if exists) if (current > 0) { MmapThresholdStep prev = (MmapThresholdStep)(current - 1); double score = ucb1_score(state, prev); if (score > best_score) { best_score = score; best_step = prev; } } // Try next step (if exists) if (current < STEP_COUNT - 1) { MmapThresholdStep next = (MmapThresholdStep)(current + 1); double score = ucb1_score(state, next); if (score > best_score) { best_score = score; best_step = next; } } return best_step; } // Update statistics for a step static void ucb1_update(UCB1State* state, MmapThresholdStep step, double reward) { // Update running average uint64_t n = state->step_trials[step]; if (n == 0) { state->avg_reward[step] = reward; } else { // Incremental average: avg_new = (avg_old × n + reward) / (n + 1) state->avg_reward[step] = (state->avg_reward[step] * n + reward) / (n + 1); } // Update counts state->step_trials[step]++; state->total_trials++; } // ============================================================================ // Hysteresis (Safety Mechanism) // ============================================================================ static int hysteresis_should_adopt( UCB1State* state, MmapThresholdStep new_step, double current_reward, double new_reward ) { // Calculate improvement percentage double improvement = (new_reward - current_reward) / fabs(current_reward); // Check if improvement is sufficient if (improvement >= HYSTERESIS_IMPROVE_PCT) { // Same candidate as before? if (state->candidate_step == new_step) { state->consecutive_count++; } else { // New candidate, reset counter state->candidate_step = new_step; state->consecutive_count = 1; } // Reached threshold? if (state->consecutive_count >= HYSTERESIS_CONSECUTIVE) { // Reset and adopt! state->consecutive_count = 0; state->candidate_step = (MmapThresholdStep)-1; // Invalid return 1; } } else { // Improvement insufficient, reset state->consecutive_count = 0; state->candidate_step = (MmapThresholdStep)-1; } return 0; } // ============================================================================ // Cooldown (Stability Mechanism) // ============================================================================ static int cooldown_can_adjust(const UCB1State* state) { if (state->last_adoption_time_ms == 0) { return 1; // First time, always OK } uint64_t now = get_time_ms(); uint64_t elapsed_ms = now - state->last_adoption_time_ms; uint64_t elapsed_secs = elapsed_ms / 1000; return elapsed_secs >= COOLDOWN_SECS; } static void cooldown_record_adoption(UCB1State* state) { state->last_adoption_time_ms = get_time_ms(); } // ============================================================================ // Evolution Cycle (Main Logic) // ============================================================================ void hak_trigger_evolution(void) { if (!g_evolution_enabled) { return; // Evolution disabled } printf("\n[UCB1] Evolution cycle triggered\n"); // For PoC, we only evolve the first active site // Real implementation would iterate all sites UCB1State* state = &g_ucb1_states[0]; // 1. Check cooldown if (!cooldown_can_adjust(state)) { printf("[UCB1] Cooldown active, skipping evolution\n"); return; } // 2. Measure current KPI hak_kpi_t kpi; hak_get_kpi(&kpi); // 3. Calculate reward double reward = calculate_reward(&kpi); printf("[UCB1] Current reward: %.3f (P99=%lu ns, PF=%lu)\n", reward, (unsigned long)kpi.p99_alloc_ns, (unsigned long)kpi.hard_page_faults); // 4. Update statistics for current step ucb1_update(state, state->current_step, reward); // 5. Select best step using UCB1 MmapThresholdStep best_step = ucb1_select(state); printf("[UCB1] UCB1 selected step: %d (current: %d)\n", best_step, state->current_step); // 6. Check if different from current if (best_step != state->current_step) { // Get current reward (average of current step) double current_reward = state->avg_reward[state->current_step]; double new_reward = state->avg_reward[best_step]; // 7. Hysteresis check if (hysteresis_should_adopt(state, best_step, current_reward, new_reward)) { printf("[UCB1] ✅ ADOPTING new step %d → %d (improvement: %.1f%%)\n", state->current_step, best_step, (new_reward - current_reward) / fabs(current_reward) * 100.0); state->current_step = best_step; cooldown_record_adoption(state); } else { printf("[UCB1] Hysteresis: need %d more consecutive improvements\n", HYSTERESIS_CONSECUTIVE - state->consecutive_count); } } printf("[UCB1] Current step: %d (%zu bytes)\n", state->current_step, step_to_bytes(state->current_step)); } // ============================================================================ // Public API // ============================================================================ void hak_enable_evolution(int enable) { g_evolution_enabled = enable; if (enable) { printf("[UCB1] Evolution ENABLED\n"); // Initialize UCB1 state (simplified: first site only) UCB1State* state = &g_ucb1_states[0]; memset(state, 0, sizeof(UCB1State)); state->current_step = STEP_256KB; // Start at 256KB (reasonable default) state->candidate_step = (MmapThresholdStep)-1; // Invalid } else { printf("[UCB1] Evolution DISABLED (baseline mode)\n"); } } // Get current step for a site (simplified: always site 0) MmapThresholdStep hak_ucb1_get_step(void) { if (!g_evolution_enabled) { return STEP_256KB; // Baseline default } return g_ucb1_states[0].current_step; } // Get step size in bytes size_t hak_ucb1_get_threshold(void) { MmapThresholdStep step = hak_ucb1_get_step(); return step_to_bytes(step); }