// Box: ACE (Adaptive Control Engine) // Purpose: Dynamic SuperSlab size adaptation based on allocation patterns #include "ss_ace_box.h" #include "hakmem_super_registry.h" #include "hakmem_tiny_config.h" #include #include // ============================================================================ // ACE State (Global) // ============================================================================ SuperSlabACEState g_ss_ace[TINY_NUM_CLASSES_SS] = {{0}}; // Runtime override for ACE target_lg (ENV: HAKMEM_TINY_SS_FORCE_LG) static int g_ss_force_lg = -1; // ======================================================================== // ACE Threshold Profiles (Demote/Promote Utilization) // ======================================================================== typedef struct { double demote_util; // Utilization threshold for 2MB→1MB demotion double promote_util; // Utilization threshold for 1MB→2MB promotion } AceProfile; // Profile 0: Conservative (original) // - Demote when util < 35% (2MB→1MB) // - Promote when util > 75% (1MB→2MB) // Profile 1: Slightly more aggressive demotion // - Demote when util < 40% (2MB→1MB) // - Promote when util > 75% // Profile 2: Easier promotion (keep 2MB more often) ★ DEFAULT // - Demote when util < 35% // - Promote when util > 70% // - Best performance for 256B workload (+3.0% vs Profile 0) static const AceProfile g_ace_profiles[] = { {0.35, 0.75}, {0.40, 0.75}, {0.35, 0.70}, // DEFAULT: Profile 2 }; #define ACE_PROFILE_COUNT (int)(sizeof(g_ace_profiles) / sizeof(g_ace_profiles[0])) static _Atomic int g_ace_profile_idx = 2; // DEFAULT: Profile 2 (easier promotion) static const AceProfile* ace_current_profile(void) { static int env_parsed = 0; if (!env_parsed) { const char* env = getenv("HAKMEM_ACE_PROFILE"); if (env && *env) { int idx = atoi(env); if (idx >= 0 && idx < ACE_PROFILE_COUNT) { atomic_store_explicit(&g_ace_profile_idx, idx, memory_order_relaxed); } } env_parsed = 1; } int idx = atomic_load_explicit(&g_ace_profile_idx, memory_order_relaxed); if (idx < 0 || idx >= ACE_PROFILE_COUNT) { idx = 0; } return &g_ace_profiles[idx]; } void hak_tiny_superslab_ace_set_profile(int idx) { if (idx < 0 || idx >= ACE_PROFILE_COUNT) { return; } atomic_store_explicit(&g_ace_profile_idx, idx, memory_order_relaxed); } // ============================================================================ // ACE-Aware Size Selection // ============================================================================ // Decide next SuperSlab lg for a class (ACE-aware, clamped) static inline uint8_t hak_tiny_superslab_next_lg(int class_idx) { if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) { return SUPERSLAB_LG_DEFAULT; } // Prefer ACE target if within allowed range uint8_t t = atomic_load_explicit((_Atomic uint8_t*)&g_ss_ace[class_idx].target_lg, memory_order_relaxed); if (t < SUPERSLAB_LG_MIN || t > SUPERSLAB_LG_MAX) { return SUPERSLAB_LG_DEFAULT; } return t; } // ============================================================================ // ACE Tick Function (Promotion/Demotion Logic) // ============================================================================ #define ACE_TICK_NS (150ULL * 1000 * 1000) // 150ms tick interval #define ACE_COOLDOWN_NS (800ULL * 1000 * 1000) // 0.8s cooldown (anti-oscillation) // Simplified thresholds for refill activity #define HI_REFILL(k) (g_ss_ace[k].refill_count > 64) // High refill rate #define MID_REFILL(k) (g_ss_ace[k].refill_count > 16) // Medium refill rate // Object sizes per class (for capacity calculation) // Must match TINY size classes: 8, 16, 24, 32, 40, 48, 56, 64 bytes static const int g_tiny_obj_sizes[TINY_NUM_CLASSES_SS] = {8, 16, 24, 32, 40, 48, 56, 64}; void hak_tiny_superslab_ace_tick(int k, uint64_t now) { if (k < 0 || k >= TINY_NUM_CLASSES_SS) return; SuperSlabACEState* c = &g_ss_ace[k]; // Rate limiting: only tick every ACE_TICK_NS (~150ms) if (now - c->last_tick_ns < ACE_TICK_NS) return; // Calculate capacity for 1MB and 2MB SuperSlabs int obj_size = g_tiny_obj_sizes[k]; double cap1MB = (double)((1U << 20) / obj_size); // 1MB capacity double cap2MB = (double)((1U << 21) / obj_size); // 2MB capacity // Calculate hotness score (weighted: 60% live blocks, 40% refill rate) double hot = 0.6 * (double)c->live_blocks + 0.4 * (double)c->refill_count; if (hot < 0) hot = 0; if (hot > 1000) hot = 1000; c->hot_score = (uint16_t)hot; // Cooldown mechanism: prevent size changes within 0.8s of last change static uint64_t last_switch_ns[TINY_NUM_CLASSES_SS] = {0}; if (now - last_switch_ns[k] >= ACE_COOLDOWN_NS) { if (c->current_lg <= 20) { // Promotion condition: 1MB → 2MB // High demand (live > 75% capacity) AND high refill rate if (c->live_blocks > 0.75 * cap1MB && HI_REFILL(k)) { c->target_lg = 21; // Promote to 2MB last_switch_ns[k] = now; } } else { // Demotion condition: 2MB → 1MB (C6/C7 optimized - aggressive demote) // Low demand (live < 50% capacity) AND not high refill rate if (c->live_blocks < 0.50 * cap2MB && !HI_REFILL(k)) { c->target_lg = 20; // Demote to 1MB last_switch_ns[k] = now; } } } // EMA-style decay for counters (reduce by 75% each tick) c->alloc_count = c->alloc_count / 4; c->refill_count = c->refill_count / 4; c->spill_count = c->spill_count / 4; // live_blocks is updated incrementally by alloc/free, not decayed here c->last_tick_ns = now; } // ============================================================================ // ACE Observer (Registry-based, zero hot-path overhead) // ============================================================================ // Global debug flag (set once at initialization) static int g_ace_debug = 0; // Registry-based observation: scan all SuperSlabs for usage stats static void ace_observe_and_decide(int k) { if (k < 0 || k >= TINY_NUM_CLASSES_SS) return; SuperSlabACEState* c = &g_ss_ace[k]; // Scan Registry to count SuperSlabs and total live blocks int ss_count = 0; uint32_t total_live = 0; for (int i = 0; i < SUPER_REG_SIZE; i++) { SuperRegEntry* e = &g_super_reg[i]; // Atomic read (thread-safe) uintptr_t base = atomic_load_explicit( (_Atomic uintptr_t*)&e->base, memory_order_acquire); if (base == 0) continue; // Empty slot // Phase 8.4: Safety check - skip if ss pointer is invalid if (!e->ss) continue; // Phase 12: per-SS size_class removed; registry entries are per-class by construction. ss_count++; // Phase 8.4: Scan all slabs to count used blocks (zero hot-path overhead) uint32_t ss_live = 0; int cap_scan = ss_slabs_capacity(e->ss); for (int slab_idx = 0; slab_idx < cap_scan; slab_idx++) { TinySlabMeta* meta = &e->ss->slabs[slab_idx]; // Relaxed read is OK (stats only, no hot-path impact) ss_live += meta->used; } total_live += ss_live; } // Calculate utilization int obj_size = g_tiny_obj_sizes[k]; uint8_t current_lg = atomic_load_explicit( (_Atomic uint8_t*)&c->current_lg, memory_order_relaxed); uint32_t capacity = (ss_count > 0) ? ss_count * ((1U << current_lg) / obj_size) : 1; double util = (double)total_live / capacity; // Update hot_score (for debugging/visualization) c->hot_score = (uint16_t)(util * 1000); if (c->hot_score > 1000) c->hot_score = 1000; // Promotion/Demotion decision uint8_t new_target = current_lg; const AceProfile* prof = ace_current_profile(); if (current_lg <= 20) { // Promotion: 1MB → 2MB if (util > prof->promote_util) { new_target = 21; } } else { // Demotion: 2MB → 1MB if (util < prof->demote_util) { new_target = 20; } } // Debug output (if enabled) if (g_ace_debug && ss_count > 0) { fprintf(stderr, "[ACE] Class %d (%dB): ss=%d live=%u cap=%u util=%.2f%% lg=%d->%d hot=%d\n", k, obj_size, ss_count, total_live, capacity, util * 100.0, current_lg, new_target, c->hot_score); } // Atomic write (thread-safe): target と current を同期させる if (new_target != current_lg) { atomic_store_explicit( (_Atomic uint8_t*)&c->target_lg, new_target, memory_order_release); atomic_store_explicit( (_Atomic uint8_t*)&c->current_lg, new_target, memory_order_release); if (g_ace_debug) { fprintf(stderr, "[ACE] *** Class %d: SIZE CHANGE %dMB -> %dMB (util=%.2f%%)\n", k, 1 << (current_lg - 20), 1 << (new_target - 20), util * 100.0); } } } // Called from Learner thread (background observation) void hak_tiny_superslab_ace_observe_all(void) { // Initialize debug flag once static int initialized = 0; if (!initialized) { const char* ace_debug = getenv("HAKMEM_ACE_DEBUG"); g_ace_debug = (ace_debug && atoi(ace_debug) != 0) ? 1 : 0; initialized = 1; } for (int k = 0; k < TINY_NUM_CLASSES_SS; k++) { ace_observe_and_decide(k); } } // ============================================================================ // ACE Statistics // ============================================================================ void superslab_ace_print_stats(void) { printf("=== ACE (Adaptive Control Engine) Stats ===\n"); const char* class_names[8] = {"8B", "16B", "24B", "32B", "40B", "48B", "56B", "64B"}; printf("Class Curr Targ Hot Allocs Refills Spills LiveBlks\n"); printf("--------------------------------------------------------------\n"); for (int i = 0; i < TINY_NUM_CLASSES_SS; i++) { SuperSlabACEState* c = &g_ss_ace[i]; printf("%-6s %2uMB %2uMB %4u %7u %8u %7u %9u\n", class_names[i], (1u << c->current_lg) / (1024 * 1024), (1u << c->target_lg) / (1024 * 1024), c->hot_score, c->alloc_count, c->refill_count, c->spill_count, c->live_blocks); } printf("\n"); }