Files
hakmem/core/hakmem_ace_controller.c

282 lines
9.0 KiB
C
Raw Permalink Normal View History

/* hakmem_ace_controller.c - ACE Learning Controller Implementation */
#include "hakmem_ace_controller.h"
#include "hakmem_tiny_magazine.h" // For hkm_ace_set_tls_capacity()
#include "hakmem_tiny.h" // For hkm_ace_set_drain_threshold()
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <math.h>
/* ========== 環境変数読み取りヘルパー ========== */
static int getenv_int(const char *name, int default_value) {
const char *val = getenv(name);
return val ? atoi(val) : default_value;
}
static uint64_t getenv_uint64(const char *name, uint64_t default_value) {
const char *val = getenv(name);
return val ? (uint64_t)atoll(val) : default_value;
}
/* ========== ログマクロ ========== */
#if HAKMEM_BUILD_RELEASE
#define ACE_LOG_INFO(ctrl, fmt, ...) do { (void)(ctrl); } while (0)
#else
#define ACE_LOG_INFO(ctrl, fmt, ...) \
do { if (hkm_ace_log_info_enabled(ctrl)) { \
fprintf(stderr, "[ACE] " fmt "\n", ##__VA_ARGS__); \
} } while (0)
#endif
#if HAKMEM_BUILD_RELEASE
#define ACE_LOG_DEBUG(ctrl, fmt, ...) do { (void)(ctrl); } while (0)
#else
#define ACE_LOG_DEBUG(ctrl, fmt, ...) \
do { if (hkm_ace_log_debug_enabled(ctrl)) { \
fprintf(stderr, "[ACE DEBUG] " fmt "\n", ##__VA_ARGS__); \
} } while (0)
#endif
/* ========== 初期化 ========== */
void hkm_ace_controller_init(struct hkm_ace_controller *ctrl) {
memset(ctrl, 0, sizeof(*ctrl));
/* 環境変数から設定読み込み */
ctrl->enabled = getenv_int("HAKMEM_ACE_ENABLED", 0);
ctrl->fast_interval_ms = getenv_uint64("HAKMEM_ACE_FAST_INTERVAL_MS", 500);
ctrl->slow_interval_ms = getenv_uint64("HAKMEM_ACE_SLOW_INTERVAL_MS", 30000);
ctrl->log_level = getenv_int("HAKMEM_ACE_LOG_LEVEL", 1);
if (!ctrl->enabled) {
ACE_LOG_INFO(ctrl, "ACE disabled (HAKMEM_ACE_ENABLED=0)");
return;
}
ACE_LOG_INFO(ctrl, "ACE initializing...");
ACE_LOG_INFO(ctrl, " Fast interval: %lu ms", ctrl->fast_interval_ms);
ACE_LOG_INFO(ctrl, " Slow interval: %lu ms", ctrl->slow_interval_ms);
ACE_LOG_INFO(ctrl, " Log level: %d", ctrl->log_level);
/* メトリクス初期化 */
hkm_ace_metrics_init();
/* UCB1バンディット初期化クラス毎 */
for (int c = 0; c < 8; c++) {
/* TLS capacity学習 */
hkm_ucb1_init(&ctrl->tls_cap_bandit[c],
TLS_CAP_CANDIDATES,
TLS_CAP_N_CANDIDATES,
1.414); /* sqrt(2) */
/* Drain threshold学習 */
hkm_ucb1_init(&ctrl->drain_bandit[c],
DRAIN_THRESH_CANDIDATES,
DRAIN_THRESH_N_CANDIDATES,
1.414);
/* 初期値設定(デフォルト) */
ctrl->tls_capacity[c] = 128; /* デフォルトTLS capacity */
ctrl->drain_threshold[c] = 1024; /* デフォルトdrain threshold */
}
/* タイムスタンプ初期化 */
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
uint64_t now_ms = (uint64_t)ts.tv_sec * 1000ULL + (uint64_t)ts.tv_nsec / 1000000ULL;
ctrl->last_fast_tick_ms = now_ms;
ctrl->last_slow_tick_ms = now_ms;
ACE_LOG_INFO(ctrl, "ACE initialized successfully");
}
/* ========== 破棄 ========== */
void hkm_ace_controller_destroy(struct hkm_ace_controller *ctrl) {
if (!ctrl || !ctrl->enabled) {
return;
}
ACE_LOG_INFO(ctrl, "ACE shutting down...");
/* スレッド停止 */
if (ctrl->running) {
hkm_ace_controller_stop(ctrl);
}
/* メトリクス破棄 */
hkm_ace_metrics_destroy();
ACE_LOG_INFO(ctrl, "ACE shut down complete");
}
/* ========== 報酬計算 ========== */
static double compute_reward(const struct hkm_ace_metrics *metrics) {
/* 報酬 = スループット - ペナルティ
* = LLC miss + mutex wait + backlog
*/
double throughput_reward = (double)metrics->throughput_ops / 1000000.0; /* M ops/s単位 */
/* LLC missペナルティ0.1でもかなり悪い) */
double llc_penalty = metrics->llc_miss_rate * 10.0;
/* Mutex waitペナルティ1ms待ちで -0.1 */
double mutex_penalty = (double)metrics->mutex_wait_ns / 10000000.0;
/* Backlogペナルティ合計で算出 */
uint64_t total_backlog = 0;
for (int c = 0; c < 8; c++) {
total_backlog += metrics->remote_free_backlog[c];
}
double backlog_penalty = (double)total_backlog / 10000.0;
double reward = throughput_reward - llc_penalty - mutex_penalty - backlog_penalty;
return reward;
}
/* ========== Fast Loop ========== */
void hkm_ace_controller_fast_loop(struct hkm_ace_controller *ctrl) {
/* メトリクス収集 */
hkm_ace_metrics_collect_fast(&ctrl->current);
/* 報酬計算 */
double reward = compute_reward(&ctrl->current);
ACE_LOG_DEBUG(ctrl, "Fast loop: throughput=%lu ops/s, llc_miss=%.3f, mutex_wait=%lu ns, reward=%.3f",
ctrl->current.throughput_ops,
ctrl->current.llc_miss_rate,
ctrl->current.mutex_wait_ns,
reward);
/* クラス毎にUCB1学習 + ノブ調整 */
for (int c = 0; c < 8; c++) {
/* TLS capacity調整 */
int arm_idx = hkm_ucb1_select(&ctrl->tls_cap_bandit[c]);
uint32_t new_cap = hkm_ucb1_get_value(&ctrl->tls_cap_bandit[c], arm_idx);
if (new_cap != ctrl->tls_capacity[c]) {
uint32_t old_cap = ctrl->tls_capacity[c];
ctrl->tls_capacity[c] = new_cap;
/* Apply to Tiny Pool (NEW Phase ACE) */
hkm_ace_set_tls_capacity(c, new_cap);
ACE_LOG_INFO(ctrl, "Class %d TLS capacity: %u → %u (arm %d)",
c, old_cap, new_cap, arm_idx);
}
/* 報酬更新 */
hkm_ucb1_update(&ctrl->tls_cap_bandit[c], arm_idx, reward);
/* Drain threshold調整 */
int drain_arm = hkm_ucb1_select(&ctrl->drain_bandit[c]);
uint32_t new_thresh = hkm_ucb1_get_value(&ctrl->drain_bandit[c], drain_arm);
if (new_thresh != ctrl->drain_threshold[c]) {
uint32_t old_thresh = ctrl->drain_threshold[c];
ctrl->drain_threshold[c] = new_thresh;
hkm_ace_set_drain_threshold(c, new_thresh); // Apply to Tiny Pool
ACE_LOG_INFO(ctrl, "Class %d drain threshold: %u → %u (arm %d)",
c, old_thresh, new_thresh, drain_arm);
}
/* 報酬更新 */
hkm_ucb1_update(&ctrl->drain_bandit[c], drain_arm, reward);
}
}
/* ========== Slow Loop ========== */
void hkm_ace_controller_slow_loop(struct hkm_ace_controller *ctrl) {
/* Slow metrics収集 */
hkm_ace_metrics_collect_slow(&ctrl->current);
ACE_LOG_DEBUG(ctrl, "Slow loop: fragmentation=%.3f, rss=%lu MB",
ctrl->current.fragmentation_ratio,
ctrl->current.rss_mb);
/* TODO: Fragmentation対策Phase 2で実装
* - fragmentation_ratio > 0.7 partial release
* - rss_mb > threshold budgeted scavenge
*/
}
/* ========== Tick処理 ========== */
void hkm_ace_controller_tick(struct hkm_ace_controller *ctrl) {
if (!ctrl || !ctrl->enabled) {
return;
}
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
uint64_t now_ms = (uint64_t)ts.tv_sec * 1000ULL + (uint64_t)ts.tv_nsec / 1000000ULL;
/* Fast loop実行判定 */
if (now_ms - ctrl->last_fast_tick_ms >= ctrl->fast_interval_ms) {
hkm_ace_controller_fast_loop(ctrl);
ctrl->last_fast_tick_ms = now_ms;
}
/* Slow loop実行判定 */
if (now_ms - ctrl->last_slow_tick_ms >= ctrl->slow_interval_ms) {
hkm_ace_controller_slow_loop(ctrl);
ctrl->last_slow_tick_ms = now_ms;
}
}
/* ========== バックグラウンドスレッド ========== */
static void* hkm_ace_bg_thread_main(void *arg) {
struct hkm_ace_controller *ctrl = (struct hkm_ace_controller *)arg;
ACE_LOG_INFO(ctrl, "ACE background thread started");
while (!ctrl->stop_requested) {
hkm_ace_controller_tick(ctrl);
usleep(100000); /* 100ms sleep */
}
ACE_LOG_INFO(ctrl, "ACE background thread stopped");
return NULL;
}
void hkm_ace_controller_start(struct hkm_ace_controller *ctrl) {
if (!ctrl || !ctrl->enabled || ctrl->running) {
return;
}
ctrl->stop_requested = false;
ctrl->running = true;
int ret = pthread_create(&ctrl->bg_thread, NULL, hkm_ace_bg_thread_main, ctrl);
if (ret != 0) {
fprintf(stderr, "[ACE ERROR] Failed to create background thread: %d\n", ret);
ctrl->running = false;
return;
}
ACE_LOG_INFO(ctrl, "ACE background thread creation successful");
}
void hkm_ace_controller_stop(struct hkm_ace_controller *ctrl) {
if (!ctrl || !ctrl->running) {
return;
}
ACE_LOG_INFO(ctrl, "Stopping ACE background thread...");
ctrl->stop_requested = true;
pthread_join(ctrl->bg_thread, NULL);
ctrl->running = false;
ACE_LOG_INFO(ctrl, "ACE background thread joined");
}