Major Features: - Debug counter infrastructure for Refill Stage tracking - Free Pipeline counters (ss_local, ss_remote, tls_sll) - Diagnostic counters for early return analysis - Unified larson.sh benchmark runner with profiles - Phase 6-3 regression analysis documentation Bug Fixes: - Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB) - Fix profile variable naming consistency - Add .gitignore patterns for large files Performance: - Phase 6-3: 4.79 M ops/s (has OOM risk) - With SuperSlab: 3.13 M ops/s (+19% improvement) This is a clean repository without large log files. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
85 lines
2.6 KiB
C
85 lines
2.6 KiB
C
#include "hakmem_learn_log.h"
|
|
#include <pthread.h>
|
|
#include <time.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
// Global registry of rings (simple, guarded by a mutex)
|
|
static hkm_log_ring_t* g_rings[256];
|
|
static int g_ring_count = 0;
|
|
static pthread_mutex_t g_ring_mu = PTHREAD_MUTEX_INITIALIZER;
|
|
|
|
// TLS ring and RNG state
|
|
static __thread hkm_log_ring_t* t_ring = NULL;
|
|
static __thread uint32_t t_rng = 0x12345678u;
|
|
static int g_sample_exp = 0; // exponent: 0=every, 8=1/256
|
|
static int g_inited = 0;
|
|
|
|
static inline uint64_t now_ns(void) {
|
|
struct timespec ts; clock_gettime(CLOCK_REALTIME, &ts);
|
|
return (uint64_t)ts.tv_sec*1000000000ull + (uint64_t)ts.tv_nsec;
|
|
}
|
|
|
|
static inline uint32_t tid_low(void) {
|
|
return (uint32_t)(uintptr_t)pthread_self();
|
|
}
|
|
|
|
void hkm_log_init_tls(void) {
|
|
if (g_inited == 0) {
|
|
const char* s = getenv("HAKMEM_LEARN_SAMPLE");
|
|
if (s) { int v = atoi(s); if (v>=0 && v<=16) g_sample_exp = v; }
|
|
g_inited = 1;
|
|
}
|
|
if (!t_ring) {
|
|
t_ring = (hkm_log_ring_t*)calloc(1, sizeof(hkm_log_ring_t));
|
|
if (!t_ring) return;
|
|
pthread_mutex_lock(&g_ring_mu);
|
|
if (g_ring_count < 256) g_rings[g_ring_count++] = t_ring;
|
|
pthread_mutex_unlock(&g_ring_mu);
|
|
// seed rng with tid
|
|
t_rng ^= tid_low();
|
|
}
|
|
}
|
|
|
|
void hkm_log_maybe(uintptr_t site, uint32_t size, uint16_t class_idx) {
|
|
if (!t_ring) hkm_log_init_tls();
|
|
if (!t_ring) return;
|
|
// 1/2^k sampling via LFSR-like xorshift
|
|
t_rng ^= t_rng << 13; t_rng ^= t_rng >> 17; t_rng ^= t_rng << 5;
|
|
if ((t_rng & ((1u<<g_sample_exp)-1u)) != 0u) return;
|
|
uint32_t h = t_ring->head;
|
|
hkm_log_entry_t* e = &t_ring->entries[h & 1023u];
|
|
e->ts_ns = now_ns();
|
|
e->site = site;
|
|
e->size = size;
|
|
e->class_idx = class_idx;
|
|
e->tid_low = tid_low();
|
|
// publish
|
|
__atomic_store_n(&t_ring->head, h+1, __ATOMIC_RELEASE);
|
|
}
|
|
|
|
void hkm_log_flush_file(const char* path) {
|
|
if (!path || !*path) return;
|
|
FILE* fp = fopen(path, "a");
|
|
if (!fp) return;
|
|
pthread_mutex_lock(&g_ring_mu);
|
|
for (int i=0;i<g_ring_count;i++) {
|
|
hkm_log_ring_t* r = g_rings[i];
|
|
if (!r) continue;
|
|
// snapshot
|
|
uint32_t head = __atomic_load_n(&r->head, __ATOMIC_ACQUIRE);
|
|
uint32_t tail = r->tail;
|
|
while (tail < head) {
|
|
hkm_log_entry_t* e = &r->entries[tail & 1023u];
|
|
fprintf(fp, "%llu,%p,%u,%u,%u\n",
|
|
(unsigned long long)e->ts_ns, (void*)e->site, e->size, (unsigned)e->class_idx, e->tid_low);
|
|
tail++;
|
|
}
|
|
r->tail = tail;
|
|
}
|
|
pthread_mutex_unlock(&g_ring_mu);
|
|
fclose(fp);
|
|
}
|
|
|