Files
hakmem/core/hakmem_ucb1.c
Moe Charm (CI) 52386401b3 Debug Counters Implementation - Clean History
Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-05 12:31:14 +09:00

335 lines
11 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// hakmem_ucb1.c - UCB1 Bandit Implementation
// Purpose: Automatic policy evolution via reinforcement learning
//
// License: MIT
// Date: 2025-10-21
#include "hakmem.h"
#include <math.h>
#include <string.h>
#include <time.h>
#include <stdio.h>
// ============================================================================
// Configuration
// ============================================================================
#define UCB1_EXPLORATION_FACTOR 2.0 // √(2 × ln(N) / n)
#define HYSTERESIS_IMPROVE_PCT 0.08 // 8% improvement required
#define HYSTERESIS_CONSECUTIVE 3 // 3 consecutive improvements
#define COOLDOWN_SECS 180 // 3 minutes cooldown
// ============================================================================
// Discrete Step Tables
// ============================================================================
// mmap_threshold steps (6 levels)
static const size_t MMAP_THRESHOLD_STEPS[STEP_COUNT] = {
64 * 1024, // 64KB
128 * 1024, // 128KB
256 * 1024, // 256KB
512 * 1024, // 512KB
1024 * 1024, // 1MB
2048 * 1024, // 2MB
};
// Convert step enum to bytes
static inline size_t step_to_bytes(MmapThresholdStep step) {
if (step >= STEP_COUNT) return MMAP_THRESHOLD_STEPS[STEP_COUNT - 1];
return MMAP_THRESHOLD_STEPS[step];
}
// ============================================================================
// UCB1 State (per call-site)
// ============================================================================
typedef struct {
// Per-step statistics
double avg_reward[STEP_COUNT]; // Average reward for each step
uint64_t step_trials[STEP_COUNT]; // Trial count for each step
// Current state
MmapThresholdStep current_step; // Currently active step
uint64_t total_trials; // Total trials across all steps
// Hysteresis (safety mechanism)
MmapThresholdStep candidate_step; // Candidate for next step
uint32_t consecutive_count; // Consecutive improvements
// Cooldown (stability mechanism)
uint64_t last_adoption_time_ms; // Last time we adopted a new step
} UCB1State;
// ============================================================================
// Global UCB1 State
// ============================================================================
static UCB1State g_ucb1_states[256]; // Per-site UCB1 state (simplified)
static int g_evolution_enabled = 0; // 0 = baseline, 1 = evolving
// ============================================================================
// KPI Measurement
// ============================================================================
// Get current timestamp in milliseconds
static uint64_t get_time_ms(void) {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
}
// Normalize value to [0, 1] range
static double normalize(double value, double min, double max) {
double range = max - min;
if (range < 0.001) return 0.5; // Avoid division by zero
return (value - min) / range;
}
// Calculate reward from KPI
// Lower is better (negative reward for high latency/PF/RSS)
static double calculate_reward(const hak_kpi_t* kpi) {
// Normalize KPIs to [0, 1]
double norm_p99 = normalize((double)kpi->p99_alloc_ns, 0.0, 1000.0); // 0-1000ns
double norm_pf = normalize((double)kpi->hard_page_faults, 0.0, 100.0); // 0-100 faults
double norm_rss = normalize((double)kpi->rss_delta_mb, -10.0, 10.0); // -10MB ~ +10MB
// Reward = minimize cost
// P99 is most important (weight 1.0)
// Page Faults are moderately important (weight 0.5)
// RSS is less important (weight 0.2)
double reward = -(norm_p99 + 0.5 * norm_pf + 0.2 * norm_rss);
return reward;
}
// ============================================================================
// UCB1 Algorithm
// ============================================================================
// Calculate UCB1 score for a step
static double ucb1_score(
const UCB1State* state,
MmapThresholdStep step
) {
// If never tried, return infinity (prioritize exploration)
if (state->step_trials[step] == 0) {
return INFINITY;
}
// UCB1 formula: avg_reward + √(C × ln(N) / n)
// C = exploration factor (2.0)
// N = total trials
// n = trials for this step
double avg_reward = state->avg_reward[step];
double exploration_bonus = sqrt(
UCB1_EXPLORATION_FACTOR * log((double)state->total_trials) /
(double)state->step_trials[step]
);
return avg_reward + exploration_bonus;
}
// Select best step using UCB1 (only ±1 neighbors)
static MmapThresholdStep ucb1_select(const UCB1State* state) {
MmapThresholdStep current = state->current_step;
MmapThresholdStep best_step = current;
double best_score = ucb1_score(state, current);
// Try previous step (if exists)
if (current > 0) {
MmapThresholdStep prev = (MmapThresholdStep)(current - 1);
double score = ucb1_score(state, prev);
if (score > best_score) {
best_score = score;
best_step = prev;
}
}
// Try next step (if exists)
if (current < STEP_COUNT - 1) {
MmapThresholdStep next = (MmapThresholdStep)(current + 1);
double score = ucb1_score(state, next);
if (score > best_score) {
best_score = score;
best_step = next;
}
}
return best_step;
}
// Update statistics for a step
static void ucb1_update(UCB1State* state, MmapThresholdStep step, double reward) {
// Update running average
uint64_t n = state->step_trials[step];
if (n == 0) {
state->avg_reward[step] = reward;
} else {
// Incremental average: avg_new = (avg_old × n + reward) / (n + 1)
state->avg_reward[step] = (state->avg_reward[step] * n + reward) / (n + 1);
}
// Update counts
state->step_trials[step]++;
state->total_trials++;
}
// ============================================================================
// Hysteresis (Safety Mechanism)
// ============================================================================
static int hysteresis_should_adopt(
UCB1State* state,
MmapThresholdStep new_step,
double current_reward,
double new_reward
) {
// Calculate improvement percentage
double improvement = (new_reward - current_reward) / fabs(current_reward);
// Check if improvement is sufficient
if (improvement >= HYSTERESIS_IMPROVE_PCT) {
// Same candidate as before?
if (state->candidate_step == new_step) {
state->consecutive_count++;
} else {
// New candidate, reset counter
state->candidate_step = new_step;
state->consecutive_count = 1;
}
// Reached threshold?
if (state->consecutive_count >= HYSTERESIS_CONSECUTIVE) {
// Reset and adopt!
state->consecutive_count = 0;
state->candidate_step = (MmapThresholdStep)-1; // Invalid
return 1;
}
} else {
// Improvement insufficient, reset
state->consecutive_count = 0;
state->candidate_step = (MmapThresholdStep)-1;
}
return 0;
}
// ============================================================================
// Cooldown (Stability Mechanism)
// ============================================================================
static int cooldown_can_adjust(const UCB1State* state) {
if (state->last_adoption_time_ms == 0) {
return 1; // First time, always OK
}
uint64_t now = get_time_ms();
uint64_t elapsed_ms = now - state->last_adoption_time_ms;
uint64_t elapsed_secs = elapsed_ms / 1000;
return elapsed_secs >= COOLDOWN_SECS;
}
static void cooldown_record_adoption(UCB1State* state) {
state->last_adoption_time_ms = get_time_ms();
}
// ============================================================================
// Evolution Cycle (Main Logic)
// ============================================================================
void hak_trigger_evolution(void) {
if (!g_evolution_enabled) {
return; // Evolution disabled
}
printf("\n[UCB1] Evolution cycle triggered\n");
// For PoC, we only evolve the first active site
// Real implementation would iterate all sites
UCB1State* state = &g_ucb1_states[0];
// 1. Check cooldown
if (!cooldown_can_adjust(state)) {
printf("[UCB1] Cooldown active, skipping evolution\n");
return;
}
// 2. Measure current KPI
hak_kpi_t kpi;
hak_get_kpi(&kpi);
// 3. Calculate reward
double reward = calculate_reward(&kpi);
printf("[UCB1] Current reward: %.3f (P99=%lu ns, PF=%lu)\n",
reward,
(unsigned long)kpi.p99_alloc_ns,
(unsigned long)kpi.hard_page_faults);
// 4. Update statistics for current step
ucb1_update(state, state->current_step, reward);
// 5. Select best step using UCB1
MmapThresholdStep best_step = ucb1_select(state);
printf("[UCB1] UCB1 selected step: %d (current: %d)\n",
best_step, state->current_step);
// 6. Check if different from current
if (best_step != state->current_step) {
// Get current reward (average of current step)
double current_reward = state->avg_reward[state->current_step];
double new_reward = state->avg_reward[best_step];
// 7. Hysteresis check
if (hysteresis_should_adopt(state, best_step, current_reward, new_reward)) {
printf("[UCB1] ✅ ADOPTING new step %d → %d (improvement: %.1f%%)\n",
state->current_step, best_step,
(new_reward - current_reward) / fabs(current_reward) * 100.0);
state->current_step = best_step;
cooldown_record_adoption(state);
} else {
printf("[UCB1] Hysteresis: need %d more consecutive improvements\n",
HYSTERESIS_CONSECUTIVE - state->consecutive_count);
}
}
printf("[UCB1] Current step: %d (%zu bytes)\n",
state->current_step, step_to_bytes(state->current_step));
}
// ============================================================================
// Public API
// ============================================================================
void hak_enable_evolution(int enable) {
g_evolution_enabled = enable;
if (enable) {
printf("[UCB1] Evolution ENABLED\n");
// Initialize UCB1 state (simplified: first site only)
UCB1State* state = &g_ucb1_states[0];
memset(state, 0, sizeof(UCB1State));
state->current_step = STEP_256KB; // Start at 256KB (reasonable default)
state->candidate_step = (MmapThresholdStep)-1; // Invalid
} else {
printf("[UCB1] Evolution DISABLED (baseline mode)\n");
}
}
// Get current step for a site (simplified: always site 0)
MmapThresholdStep hak_ucb1_get_step(void) {
if (!g_evolution_enabled) {
return STEP_256KB; // Baseline default
}
return g_ucb1_states[0].current_step;
}
// Get step size in bytes
size_t hak_ucb1_get_threshold(void) {
MmapThresholdStep step = hak_ucb1_get_step();
return step_to_bytes(step);
}