Debug Counters Implementation - Clean History
Major Features: - Debug counter infrastructure for Refill Stage tracking - Free Pipeline counters (ss_local, ss_remote, tls_sll) - Diagnostic counters for early return analysis - Unified larson.sh benchmark runner with profiles - Phase 6-3 regression analysis documentation Bug Fixes: - Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB) - Fix profile variable naming consistency - Add .gitignore patterns for large files Performance: - Phase 6-3: 4.79 M ops/s (has OOM risk) - With SuperSlab: 3.13 M ops/s (+19% improvement) This is a clean repository without large log files. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
84
core/hakmem_learn_log.c
Normal file
84
core/hakmem_learn_log.c
Normal file
@ -0,0 +1,84 @@
|
||||
#include "hakmem_learn_log.h"
|
||||
#include <pthread.h>
|
||||
#include <time.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
// Global registry of rings (simple, guarded by a mutex)
|
||||
static hkm_log_ring_t* g_rings[256];
|
||||
static int g_ring_count = 0;
|
||||
static pthread_mutex_t g_ring_mu = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
// TLS ring and RNG state
|
||||
static __thread hkm_log_ring_t* t_ring = NULL;
|
||||
static __thread uint32_t t_rng = 0x12345678u;
|
||||
static int g_sample_exp = 0; // exponent: 0=every, 8=1/256
|
||||
static int g_inited = 0;
|
||||
|
||||
static inline uint64_t now_ns(void) {
|
||||
struct timespec ts; clock_gettime(CLOCK_REALTIME, &ts);
|
||||
return (uint64_t)ts.tv_sec*1000000000ull + (uint64_t)ts.tv_nsec;
|
||||
}
|
||||
|
||||
static inline uint32_t tid_low(void) {
|
||||
return (uint32_t)(uintptr_t)pthread_self();
|
||||
}
|
||||
|
||||
void hkm_log_init_tls(void) {
|
||||
if (g_inited == 0) {
|
||||
const char* s = getenv("HAKMEM_LEARN_SAMPLE");
|
||||
if (s) { int v = atoi(s); if (v>=0 && v<=16) g_sample_exp = v; }
|
||||
g_inited = 1;
|
||||
}
|
||||
if (!t_ring) {
|
||||
t_ring = (hkm_log_ring_t*)calloc(1, sizeof(hkm_log_ring_t));
|
||||
if (!t_ring) return;
|
||||
pthread_mutex_lock(&g_ring_mu);
|
||||
if (g_ring_count < 256) g_rings[g_ring_count++] = t_ring;
|
||||
pthread_mutex_unlock(&g_ring_mu);
|
||||
// seed rng with tid
|
||||
t_rng ^= tid_low();
|
||||
}
|
||||
}
|
||||
|
||||
void hkm_log_maybe(uintptr_t site, uint32_t size, uint16_t class_idx) {
|
||||
if (!t_ring) hkm_log_init_tls();
|
||||
if (!t_ring) return;
|
||||
// 1/2^k sampling via LFSR-like xorshift
|
||||
t_rng ^= t_rng << 13; t_rng ^= t_rng >> 17; t_rng ^= t_rng << 5;
|
||||
if ((t_rng & ((1u<<g_sample_exp)-1u)) != 0u) return;
|
||||
uint32_t h = t_ring->head;
|
||||
hkm_log_entry_t* e = &t_ring->entries[h & 1023u];
|
||||
e->ts_ns = now_ns();
|
||||
e->site = site;
|
||||
e->size = size;
|
||||
e->class_idx = class_idx;
|
||||
e->tid_low = tid_low();
|
||||
// publish
|
||||
__atomic_store_n(&t_ring->head, h+1, __ATOMIC_RELEASE);
|
||||
}
|
||||
|
||||
void hkm_log_flush_file(const char* path) {
|
||||
if (!path || !*path) return;
|
||||
FILE* fp = fopen(path, "a");
|
||||
if (!fp) return;
|
||||
pthread_mutex_lock(&g_ring_mu);
|
||||
for (int i=0;i<g_ring_count;i++) {
|
||||
hkm_log_ring_t* r = g_rings[i];
|
||||
if (!r) continue;
|
||||
// snapshot
|
||||
uint32_t head = __atomic_load_n(&r->head, __ATOMIC_ACQUIRE);
|
||||
uint32_t tail = r->tail;
|
||||
while (tail < head) {
|
||||
hkm_log_entry_t* e = &r->entries[tail & 1023u];
|
||||
fprintf(fp, "%llu,%p,%u,%u,%u\n",
|
||||
(unsigned long long)e->ts_ns, (void*)e->site, e->size, (unsigned)e->class_idx, e->tid_low);
|
||||
tail++;
|
||||
}
|
||||
r->tail = tail;
|
||||
}
|
||||
pthread_mutex_unlock(&g_ring_mu);
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user