// smallobject_learner_v2.c // Phase v11a-2: Extended Learner for multi-dimensional MID v3.5 optimization #include #include #include #include #include "box/smallobject_learner_v2_box.h" #include "box/smallobject_stats_mid_v3_box.h" // ============================================================================ // Helper: Get timestamp // ============================================================================ static inline uint64_t get_timestamp_ns(void) { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return (uint64_t)ts.tv_sec * 1000000000ULL + (uint64_t)ts.tv_nsec; } static inline uint32_t get_timestamp_ms(void) { return (uint32_t)(get_timestamp_ns() / 1000000ULL); } // ============================================================================ // Global Learner State // ============================================================================ static SmallLearnerStatsV2 g_learner_v2_stats = {0}; static SmallLearnerClassStatsV2 g_learner_class_stats[8] = {0}; // Configuration static uint32_t g_c5_threshold_pct = SMALL_LEARNER_C5_THRESHOLD_PCT; static uint32_t g_eval_interval = SMALL_LEARNER_EVAL_INTERVAL; static uint32_t g_smoothing_factor = SMALL_LEARNER_SMOOTHING_FACTOR_PCT; static bool g_logging_enabled = false; // ============================================================================ // Event Recording // ============================================================================ void small_learner_v2_record_refill(uint32_t class_idx, uint64_t capacity) { if (class_idx >= 8) return; SmallLearnerStatsV2 *learn = &g_learner_v2_stats; SmallLearnerClassStatsV2 *cls = &g_learner_class_stats[class_idx]; learn->allocs[class_idx] += capacity; learn->total_allocations += capacity; cls->allocs += capacity; cls->sample_count++; cls->last_update_ns = get_timestamp_ns(); } void small_learner_v2_record_retire(uint32_t class_idx, uint32_t free_hit_ratio_bps) { if (class_idx >= 8) return; SmallLearnerStatsV2 *learn = &g_learner_v2_stats; SmallLearnerClassStatsV2 *cls = &g_learner_class_stats[class_idx]; learn->retire_count[class_idx]++; learn->total_retires++; // Exponential smoothing for retire ratio uint32_t alpha = g_smoothing_factor; // 0-100 uint32_t new_val = free_hit_ratio_bps / 100; // Convert to percentage if (cls->retire_ratio_smoothed == 0) { // First sample cls->retire_ratio_smoothed = new_val; } else { // EMA: smoothed = (1-alpha) * smoothed + alpha * new_val uint32_t smoothed = ((100 - alpha) * cls->retire_ratio_smoothed + alpha * new_val) / 100; cls->retire_ratio_smoothed = smoothed; } learn->retire_ratio_pct[class_idx] = cls->retire_ratio_smoothed; cls->sample_count++; cls->last_update_ns = get_timestamp_ns(); } void small_learner_v2_record_page_stats(const SmallPageStatsMID_v3 *stat) { if (!stat || stat->class_idx >= 8) return; SmallLearnerStatsV2 *learn = &g_learner_v2_stats; SmallLearnerClassStatsV2 *cls = &g_learner_class_stats[stat->class_idx]; // Record allocations learn->allocs[stat->class_idx] += stat->total_allocations; learn->total_allocations += stat->total_allocations; // Record retires learn->retire_count[stat->class_idx]++; learn->total_retires++; // Exponential smoothing for free hit ratio uint32_t alpha = g_smoothing_factor; uint32_t new_val = stat->free_hit_ratio_bps / 100; // Convert to percentage if (cls->retire_ratio_smoothed == 0) { cls->retire_ratio_smoothed = new_val; } else { uint32_t smoothed = ((100 - alpha) * cls->retire_ratio_smoothed + alpha * new_val) / 100; cls->retire_ratio_smoothed = smoothed; } learn->retire_ratio_pct[stat->class_idx] = cls->retire_ratio_smoothed; // Update global free hit ratio (EMA) if (learn->free_hit_ratio_bps == 0) { learn->free_hit_ratio_bps = stat->free_hit_ratio_bps; } else { uint32_t smoothed = ((100 - alpha) * learn->free_hit_ratio_bps + alpha * stat->free_hit_ratio_bps) / 100; learn->free_hit_ratio_bps = smoothed; } // Update class stats cls->allocs += stat->total_allocations; cls->sample_count++; cls->last_update_ns = get_timestamp_ns(); // Log if enabled if (g_logging_enabled) { fprintf(stderr, "[Learner_v2] C%u: allocs=%lu retire_ratio=%u%% free_hit=%u bps\n", stat->class_idx, stat->total_allocations, cls->retire_ratio_smoothed, stat->free_hit_ratio_bps); } } void small_learner_v2_ingest_stats(const SmallPageStatsAggregate_MID_v3 *agg) { if (!agg) return; SmallLearnerStatsV2 *learn = &g_learner_v2_stats; // Update from aggregated stats for (int i = 0; i < 8; i++) { learn->allocs[i] = agg->class_allocations[i]; learn->retire_count[i] = agg->class_retire_count[i]; // Update retire ratio from aggregate if (agg->class_retire_count[i] > 0) { uint32_t avg_ratio_pct = agg->class_avg_free_hit_bps[i] / 100; learn->retire_ratio_pct[i] = avg_ratio_pct; g_learner_class_stats[i].retire_ratio_smoothed = avg_ratio_pct; } } learn->total_allocations = agg->total_allocations; learn->total_retires = agg->total_pages_retired; learn->free_hit_ratio_bps = agg->global_avg_free_hit_bps; learn->sample_count = agg->eval_count; } // ============================================================================ // Evaluation & Decision Making // ============================================================================ void small_learner_v2_evaluate(void) { SmallLearnerStatsV2 *learn = &g_learner_v2_stats; learn->eval_count++; learn->last_eval_timestamp_ms = get_timestamp_ms(); // Calculate average page utilization if (learn->total_retires > 0) { uint64_t total_capacity = 0; uint64_t total_used = 0; for (int i = 0; i < 8; i++) { if (learn->retire_count[i] > 0) { // Estimate capacity based on retire ratio total_capacity += learn->allocs[i]; total_used += (learn->allocs[i] * learn->retire_ratio_pct[i]) / 100; } } if (total_capacity > 0) { learn->avg_page_utilization = (total_used * 10000) / total_capacity; } } if (g_logging_enabled) { fprintf(stderr, "[Learner_v2] Eval #%lu: total_allocs=%lu retires=%lu util=%lu bps\n", learn->eval_count, learn->total_allocations, learn->total_retires, learn->avg_page_utilization); } } const SmallLearnerStatsV2* small_learner_v2_stats_snapshot(void) { return &g_learner_v2_stats; } const SmallLearnerClassStatsV2* small_learner_v2_class_stats(uint32_t class_idx) { if (class_idx >= 8) return NULL; return &g_learner_class_stats[class_idx]; } // ============================================================================ // Routing Decision Support // ============================================================================ int small_learner_v2_should_use_v7(uint32_t class_idx) { (void)class_idx; // Unused in v11a-2 // Decision based on C5 ratio uint32_t c5_ratio = small_learner_v2_c5_ratio_pct(); if (c5_ratio >= g_c5_threshold_pct) { return 1; // Use v7 } return 0; // Use MID_v3 } uint32_t small_learner_v2_c5_ratio_pct(void) { SmallLearnerStatsV2 *learn = &g_learner_v2_stats; if (learn->total_allocations == 0) { return 0; } return (uint32_t)((learn->allocs[5] * 100) / learn->total_allocations); } uint32_t small_learner_v2_class_ratio_pct(uint32_t class_idx) { if (class_idx >= 8) return 0; SmallLearnerStatsV2 *learn = &g_learner_v2_stats; if (learn->total_allocations == 0) { return 0; } return (uint32_t)((learn->allocs[class_idx] * 100) / learn->total_allocations); } uint32_t small_learner_v2_retire_efficiency_pct(uint32_t class_idx) { if (class_idx >= 8) return 0; return g_learner_v2_stats.retire_ratio_pct[class_idx]; } // ============================================================================ // Configuration & Control // ============================================================================ bool small_learner_v2_enabled(void) { const char *env = getenv("HAKMEM_SMALL_LEARNER_V7_ENABLED"); return (env && *env && *env != '0'); } void small_learner_v2_set_c5_threshold_pct(uint32_t threshold) { g_c5_threshold_pct = threshold; } uint32_t small_learner_v2_get_c5_threshold_pct(void) { return g_c5_threshold_pct; } void small_learner_v2_set_eval_interval(uint32_t interval) { g_eval_interval = interval > 0 ? interval : SMALL_LEARNER_EVAL_INTERVAL; } void small_learner_v2_set_smoothing_factor(uint32_t factor_pct) { if (factor_pct > 100) factor_pct = 100; g_smoothing_factor = factor_pct; } void small_learner_v2_set_logging_enabled(bool enabled) { g_logging_enabled = enabled; } void small_learner_v2_reset(void) { memset(&g_learner_v2_stats, 0, sizeof(g_learner_v2_stats)); memset(&g_learner_class_stats, 0, sizeof(g_learner_class_stats)); g_c5_threshold_pct = SMALL_LEARNER_C5_THRESHOLD_PCT; g_eval_interval = SMALL_LEARNER_EVAL_INTERVAL; g_smoothing_factor = SMALL_LEARNER_SMOOTHING_FACTOR_PCT; } // ============================================================================ // Debugging & Monitoring // ============================================================================ void small_learner_v2_print_stats(void) { const SmallLearnerStatsV2 *learn = &g_learner_v2_stats; fprintf(stderr, "[Learner_v2] Statistics:\n"); fprintf(stderr, " total_allocations=%lu total_retires=%lu\n", learn->total_allocations, learn->total_retires); fprintf(stderr, " avg_page_util=%lu bps (%.2f%%) free_hit=%u bps\n", learn->avg_page_utilization, learn->avg_page_utilization / 100.0, learn->free_hit_ratio_bps); fprintf(stderr, " eval_count=%lu sample_count=%lu\n", learn->eval_count, learn->sample_count); for (int i = 0; i < 8; i++) { if (learn->allocs[i] > 0) { fprintf(stderr, " C%d: allocs=%lu retires=%u ratio=%u%%\n", i, learn->allocs[i], learn->retire_count[i], learn->retire_ratio_pct[i]); } } } void small_learner_v2_print_decisions(void) { fprintf(stderr, "[Learner_v2] Routing Decisions:\n"); fprintf(stderr, " C5 threshold=%u%% current_ratio=%u%%\n", g_c5_threshold_pct, small_learner_v2_c5_ratio_pct()); for (uint32_t i = 5; i <= 7; i++) { int use_v7 = small_learner_v2_should_use_v7(i); fprintf(stderr, " C%u: %s (ratio=%u%% efficiency=%u%%)\n", i, use_v7 ? "v7" : "MID_v3", small_learner_v2_class_ratio_pct(i), small_learner_v2_retire_efficiency_pct(i)); } }