Major Features: - Debug counter infrastructure for Refill Stage tracking - Free Pipeline counters (ss_local, ss_remote, tls_sll) - Diagnostic counters for early return analysis - Unified larson.sh benchmark runner with profiles - Phase 6-3 regression analysis documentation Bug Fixes: - Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB) - Fix profile variable naming consistency - Add .gitignore patterns for large files Performance: - Phase 6-3: 4.79 M ops/s (has OOM risk) - With SuperSlab: 3.13 M ops/s (+19% improvement) This is a clean repository without large log files. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
80 lines
3.1 KiB
C
80 lines
3.1 KiB
C
#include <pthread.h>
|
|
#include <stdatomic.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <time.h>
|
|
#include "hakx_l25_tuner.h"
|
|
#include "hakmem_l25_pool.h"
|
|
|
|
static pthread_t g_tuner_thread;
|
|
static _Atomic int g_tuner_run = 0;
|
|
|
|
static inline void sleep_ms(int ms) {
|
|
struct timespec ts; ts.tv_sec = ms / 1000; ts.tv_nsec = (ms % 1000) * 1000000L;
|
|
nanosleep(&ts, NULL);
|
|
}
|
|
|
|
static void* tuner_main(void* arg) {
|
|
(void)arg;
|
|
const int interval_ms = 500; // gentle cadence
|
|
// snapshot buffers
|
|
uint64_t hits_prev[5] = {0}, misses_prev[5] = {0}, refills_prev[5] = {0}, frees_prev[5] = {0};
|
|
hak_l25_pool_stats_snapshot(hits_prev, misses_prev, refills_prev, frees_prev);
|
|
int rf = 2; // start reasonable
|
|
int th = 24;
|
|
int rb = 64;
|
|
hak_l25_set_run_factor(rf);
|
|
hak_l25_set_remote_threshold(th);
|
|
hak_l25_set_bg_remote_batch(rb);
|
|
hak_l25_set_bg_remote_enable(1);
|
|
hak_l25_set_pref_remote_first(1);
|
|
|
|
while (atomic_load(&g_tuner_run)) {
|
|
sleep_ms(interval_ms);
|
|
uint64_t hits[5], misses[5], refills[5], frees[5];
|
|
memset(hits, 0, sizeof(hits)); memset(misses, 0, sizeof(misses));
|
|
memset(refills,0,sizeof(refills)); memset(frees,0,sizeof(frees));
|
|
hak_l25_pool_stats_snapshot(hits, misses, refills, frees);
|
|
|
|
// Simple heuristic: if refills grew a lot and misses also増 → run_factor++ up to 4
|
|
// if refills増だが hitsが十分 → thresholdを少し上げて targeted drain を控える
|
|
uint64_t ref_delta = 0, miss_delta = 0, hit_delta = 0;
|
|
for (int i = 0; i < 5; i++) {
|
|
if (refills[i] > refills_prev[i]) ref_delta += (refills[i] - refills_prev[i]);
|
|
if (misses[i] > misses_prev[i]) miss_delta += (misses[i] - misses_prev[i]);
|
|
if (hits[i] > hits_prev[i]) hit_delta += (hits[i] - hits_prev[i]);
|
|
}
|
|
// store snapshots
|
|
memcpy(hits_prev, hits, sizeof(hits_prev));
|
|
memcpy(misses_prev, misses, sizeof(misses_prev));
|
|
memcpy(refills_prev, refills, sizeof(refills_prev));
|
|
memcpy(frees_prev, frees, sizeof(frees_prev));
|
|
|
|
// Adjust run factor (bounds 1..4)
|
|
if (miss_delta > hit_delta / 4 && rf < 4) { rf++; hak_l25_set_run_factor(rf); }
|
|
else if (miss_delta * 3 < hit_delta && rf > 1) { rf--; hak_l25_set_run_factor(rf); }
|
|
|
|
// Adjust targeted remote threshold (bounds 8..64)
|
|
if (ref_delta > hit_delta / 3 && th > 8) { th -= 2; hak_l25_set_remote_threshold(th); }
|
|
else if (ref_delta * 2 < hit_delta && th < 64) { th += 2; hak_l25_set_remote_threshold(th); }
|
|
|
|
// Adjust bg remote batch (bounds 32..128)
|
|
if (ref_delta > hit_delta / 2 && rb < 128) { rb += 8; hak_l25_set_bg_remote_batch(rb); }
|
|
else if (ref_delta * 2 < hit_delta && rb > 32) { rb -= 8; hak_l25_set_bg_remote_batch(rb); }
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
void hakx_l25_tuner_start(void) {
|
|
if (atomic_exchange(&g_tuner_run, 1) == 0) {
|
|
pthread_create(&g_tuner_thread, NULL, tuner_main, NULL);
|
|
}
|
|
}
|
|
|
|
void hakx_l25_tuner_stop(void) {
|
|
if (atomic_exchange(&g_tuner_run, 0) == 1) {
|
|
pthread_join(g_tuner_thread, NULL);
|
|
}
|
|
}
|
|
|