Major Features: - Debug counter infrastructure for Refill Stage tracking - Free Pipeline counters (ss_local, ss_remote, tls_sll) - Diagnostic counters for early return analysis - Unified larson.sh benchmark runner with profiles - Phase 6-3 regression analysis documentation Bug Fixes: - Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB) - Fix profile variable naming consistency - Add .gitignore patterns for large files Performance: - Phase 6-3: 4.79 M ops/s (has OOM risk) - With SuperSlab: 3.13 M ops/s (+19% improvement) This is a clean repository without large log files. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
220 lines
6.7 KiB
C
220 lines
6.7 KiB
C
/* hakmem_ace_metrics.c - ACE Learning Layer Metrics Collection */
|
||
|
||
#include "hakmem_ace_metrics.h"
|
||
#include <stdio.h>
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
#include <time.h>
|
||
#include <unistd.h>
|
||
|
||
/* グローバルカウンタ(atomicで高速更新) */
|
||
_Atomic uint64_t g_ace_alloc_count = 0;
|
||
_Atomic uint64_t g_ace_free_count = 0;
|
||
_Atomic uint64_t g_ace_mutex_wait_ns = 0;
|
||
|
||
/* 前回収集時のカウンタ(delta計算用) */
|
||
static uint64_t s_prev_alloc_count = 0;
|
||
static uint64_t s_prev_free_count = 0;
|
||
static uint64_t s_prev_timestamp_ms = 0;
|
||
|
||
/* LLC miss monitoring用 */
|
||
static bool s_llc_available = false;
|
||
|
||
/* ========== 初期化・破棄 ========== */
|
||
|
||
void hkm_ace_metrics_init(void) {
|
||
/* カウンタリセット */
|
||
atomic_store_explicit(&g_ace_alloc_count, 0, memory_order_relaxed);
|
||
atomic_store_explicit(&g_ace_free_count, 0, memory_order_relaxed);
|
||
atomic_store_explicit(&g_ace_mutex_wait_ns, 0, memory_order_relaxed);
|
||
|
||
s_prev_alloc_count = 0;
|
||
s_prev_free_count = 0;
|
||
|
||
struct timespec ts;
|
||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||
s_prev_timestamp_ms = (uint64_t)ts.tv_sec * 1000ULL + (uint64_t)ts.tv_nsec / 1000000ULL;
|
||
|
||
/* LLC miss監視が利用可能かチェック */
|
||
s_llc_available = hkm_ace_llc_available();
|
||
}
|
||
|
||
void hkm_ace_metrics_destroy(void) {
|
||
/* 特に何もしない */
|
||
}
|
||
|
||
/* ========== 現在時刻取得(ミリ秒) ========== */
|
||
|
||
static inline uint64_t get_monotonic_ms(void) {
|
||
struct timespec ts;
|
||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||
return (uint64_t)ts.tv_sec * 1000ULL + (uint64_t)ts.tv_nsec / 1000000ULL;
|
||
}
|
||
|
||
/* ========== スループット計算 ========== */
|
||
|
||
static void collect_throughput(struct hkm_ace_metrics *out) {
|
||
uint64_t now_ms = get_monotonic_ms();
|
||
uint64_t elapsed_ms = (now_ms > s_prev_timestamp_ms) ? (now_ms - s_prev_timestamp_ms) : 1;
|
||
|
||
uint64_t curr_alloc = atomic_load_explicit(&g_ace_alloc_count, memory_order_relaxed);
|
||
uint64_t curr_free = atomic_load_explicit(&g_ace_free_count, memory_order_relaxed);
|
||
|
||
uint64_t delta_alloc = curr_alloc - s_prev_alloc_count;
|
||
uint64_t delta_free = curr_free - s_prev_free_count;
|
||
uint64_t delta_ops = delta_alloc + delta_free;
|
||
|
||
/* ops/sec = delta_ops / (elapsed_ms / 1000.0) */
|
||
out->throughput_ops = (delta_ops * 1000ULL) / elapsed_ms;
|
||
|
||
/* 次回用に保存 */
|
||
s_prev_alloc_count = curr_alloc;
|
||
s_prev_free_count = curr_free;
|
||
s_prev_timestamp_ms = now_ms;
|
||
out->timestamp_ms = now_ms;
|
||
}
|
||
|
||
/* ========== LLC miss rate 計算 ========== */
|
||
|
||
static void collect_llc_miss_rate(struct hkm_ace_metrics *out) {
|
||
if (!s_llc_available) {
|
||
out->llc_miss_rate = 0.0;
|
||
return;
|
||
}
|
||
|
||
uint64_t misses = 0, references = 0;
|
||
hkm_ace_llc_read(&misses, &references);
|
||
|
||
if (references > 0) {
|
||
out->llc_miss_rate = (double)misses / (double)references;
|
||
} else {
|
||
out->llc_miss_rate = 0.0;
|
||
}
|
||
}
|
||
|
||
/* ========== Mutex contention 計算 ========== */
|
||
|
||
static void collect_mutex_wait(struct hkm_ace_metrics *out) {
|
||
uint64_t total_wait_ns = atomic_load_explicit(&g_ace_mutex_wait_ns, memory_order_relaxed);
|
||
out->mutex_wait_ns = total_wait_ns;
|
||
|
||
/* リセット(次の窓用) */
|
||
atomic_store_explicit(&g_ace_mutex_wait_ns, 0, memory_order_relaxed);
|
||
}
|
||
|
||
/* ========== Remote free backlog 読み取り ========== */
|
||
|
||
extern uint32_t hkm_tiny_get_remote_backlog(uint8_t class_idx); /* tiny poolから取得 */
|
||
|
||
static void collect_remote_backlog(struct hkm_ace_metrics *out) {
|
||
for (int c = 0; c < 8; c++) {
|
||
/* TODO: tiny poolの実装に合わせて調整 */
|
||
out->remote_free_backlog[c] = 0; /* プレースホルダー */
|
||
}
|
||
}
|
||
|
||
/* ========== Fragmentation ratio 計算(/proc/self/status読み取り) ========== */
|
||
|
||
static void collect_fragmentation_and_rss(struct hkm_ace_metrics *out) {
|
||
FILE *fp = fopen("/proc/self/status", "r");
|
||
if (!fp) {
|
||
out->fragmentation_ratio = 1.0;
|
||
out->rss_mb = 0;
|
||
return;
|
||
}
|
||
|
||
uint64_t vm_size_kb = 0;
|
||
uint64_t vm_rss_kb = 0;
|
||
|
||
char line[256];
|
||
while (fgets(line, sizeof(line), fp)) {
|
||
if (strncmp(line, "VmSize:", 7) == 0) {
|
||
sscanf(line + 7, "%lu", &vm_size_kb);
|
||
} else if (strncmp(line, "VmRSS:", 6) == 0) {
|
||
sscanf(line + 6, "%lu", &vm_rss_kb);
|
||
}
|
||
}
|
||
fclose(fp);
|
||
|
||
/* Fragmentation ratio = RSS / VmSize */
|
||
if (vm_size_kb > 0) {
|
||
out->fragmentation_ratio = (double)vm_rss_kb / (double)vm_size_kb;
|
||
} else {
|
||
out->fragmentation_ratio = 1.0;
|
||
}
|
||
|
||
/* RSS in MB */
|
||
out->rss_mb = vm_rss_kb / 1024;
|
||
}
|
||
|
||
/* ========== Fast metrics 収集(0.5-1s間隔) ========== */
|
||
|
||
void hkm_ace_metrics_collect_fast(struct hkm_ace_metrics *out) {
|
||
memset(out, 0, sizeof(*out));
|
||
|
||
collect_throughput(out);
|
||
collect_llc_miss_rate(out);
|
||
collect_mutex_wait(out);
|
||
collect_remote_backlog(out);
|
||
}
|
||
|
||
/* ========== Slow metrics 収集(30-60s間隔) ========== */
|
||
|
||
void hkm_ace_metrics_collect_slow(struct hkm_ace_metrics *out) {
|
||
collect_fragmentation_and_rss(out);
|
||
}
|
||
|
||
/* ========== Full metrics 収集 ========== */
|
||
|
||
void hkm_ace_metrics_collect(struct hkm_ace_metrics *out) {
|
||
hkm_ace_metrics_collect_fast(out);
|
||
hkm_ace_metrics_collect_slow(out);
|
||
}
|
||
|
||
/* ========== LLC miss monitoring (rdpmc wrapper) ========== */
|
||
|
||
/* rdpmc利用可能性チェック
|
||
* 注: Linux kernelで CONFIG_PERF_EVENTS=y が必要
|
||
* /proc/sys/kernel/perf_event_paranoid <= 1 が必要
|
||
*/
|
||
bool hkm_ace_llc_available(void) {
|
||
/* 簡易チェック: /proc/sys/kernel/perf_event_paranoid を読む */
|
||
FILE *fp = fopen("/proc/sys/kernel/perf_event_paranoid", "r");
|
||
if (!fp) {
|
||
return false;
|
||
}
|
||
|
||
int paranoid = 2; /* デフォルト値 */
|
||
int ret = fscanf(fp, "%d", ¶noid);
|
||
fclose(fp);
|
||
(void)ret; /* Suppress unused warning */
|
||
|
||
/* paranoid <= 1 なら rdpmc 使用可能 */
|
||
return (paranoid <= 1);
|
||
}
|
||
|
||
/* LLC miss/reference カウンタ読み取り
|
||
* 注: Phase 1では簡易実装(常に0を返す)
|
||
* Phase 3でperf_event_open()を使った本実装に変更
|
||
*/
|
||
void hkm_ace_llc_read(uint64_t *misses, uint64_t *references) {
|
||
/* Phase 1: プレースホルダー実装 */
|
||
*misses = 0;
|
||
*references = 1; /* 0除算回避 */
|
||
|
||
/* TODO Phase 3: perf_event_open() + rdpmc() 実装
|
||
*
|
||
* struct perf_event_attr attr;
|
||
* memset(&attr, 0, sizeof(attr));
|
||
* attr.type = PERF_TYPE_HARDWARE;
|
||
* attr.config = PERF_COUNT_HW_CACHE_MISSES;
|
||
* int fd_miss = perf_event_open(&attr, 0, -1, -1, 0);
|
||
*
|
||
* attr.config = PERF_COUNT_HW_CACHE_REFERENCES;
|
||
* int fd_ref = perf_event_open(&attr, 0, -1, -1, 0);
|
||
*
|
||
* read(fd_miss, misses, sizeof(uint64_t));
|
||
* read(fd_ref, references, sizeof(uint64_t));
|
||
*/
|
||
}
|