Major Features: - Debug counter infrastructure for Refill Stage tracking - Free Pipeline counters (ss_local, ss_remote, tls_sll) - Diagnostic counters for early return analysis - Unified larson.sh benchmark runner with profiles - Phase 6-3 regression analysis documentation Bug Fixes: - Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB) - Fix profile variable naming consistency - Add .gitignore patterns for large files Performance: - Phase 6-3: 4.79 M ops/s (has OOM risk) - With SuperSlab: 3.13 M ops/s (+19% improvement) This is a clean repository without large log files. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
85 lines
2.4 KiB
C
85 lines
2.4 KiB
C
#include "hakmem_prof.h"
|
|
#if HAKMEM_PROF_STATIC
|
|
#include <stdatomic.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
|
|
typedef struct {
|
|
_Atomic uint64_t count;
|
|
_Atomic uint64_t sum_ns;
|
|
} hkp_slot_t;
|
|
|
|
static _Atomic int g_enabled = 0;
|
|
static _Atomic uint32_t g_mask = 0; // (1<<N)-1; 0=off
|
|
static _Atomic uint64_t g_seq_ctr = 0;
|
|
static hkp_slot_t g_slots[HKP_COUNT];
|
|
|
|
void hkm_prof_init(void) {
|
|
const char* env = getenv("HAKMEM_PROF");
|
|
if (env && atoi(env) != 0) {
|
|
atomic_store(&g_enabled, 1);
|
|
const char* sm = getenv("HAKMEM_PROF_SAMPLE");
|
|
uint32_t mask = (1u << 12) - 1u; // default 1/4096
|
|
if (sm) {
|
|
int n = atoi(sm);
|
|
if (n > 0 && n < 31) mask = (1u << n) - 1u;
|
|
}
|
|
atomic_store(&g_mask, mask);
|
|
atexit(hkm_prof_shutdown);
|
|
}
|
|
}
|
|
|
|
void hkm_prof_shutdown(void) {
|
|
if (!atomic_load(&g_enabled)) return;
|
|
static const char* names[HKP_COUNT] = {
|
|
"bigcache_try",
|
|
"tiny_alloc",
|
|
"ace_alloc",
|
|
"malloc_alloc",
|
|
"mmap_alloc",
|
|
"tiny_bitmap",
|
|
"tiny_drain_locked",
|
|
"tiny_drain_owner",
|
|
"tiny_spill",
|
|
"tiny_reg_lookup",
|
|
"tiny_reg_register",
|
|
"pool_refill",
|
|
"pool_lock",
|
|
"l25_refill",
|
|
"l25_lock"
|
|
};
|
|
printf("\n================= HAKMEM PROF (sampling) =================\n");
|
|
for (int i = 0; i < HKP_COUNT; i++) {
|
|
uint64_t n = atomic_load(&g_slots[i].count);
|
|
uint64_t s = atomic_load(&g_slots[i].sum_ns);
|
|
if (n > 0) {
|
|
double avg = (double)s / (double)n;
|
|
printf("%-16s : samples=%9lu avg_ns=%.1f\n", names[i], (unsigned long)n, avg);
|
|
}
|
|
}
|
|
printf("==========================================================\n\n");
|
|
}
|
|
|
|
int hkm_prof_enabled(void) { return atomic_load(&g_enabled); }
|
|
|
|
int hkm_prof_should_sample(void) {
|
|
if (!atomic_load(&g_enabled)) return 0;
|
|
uint32_t mask = atomic_load(&g_mask);
|
|
if (mask == 0) return 0;
|
|
uint64_t x = atomic_fetch_add(&g_seq_ctr, 1);
|
|
return ((x & mask) == 0);
|
|
}
|
|
|
|
void hkm_prof_record_ns(hkp_cat_t cat, uint64_t ns) {
|
|
if (!atomic_load(&g_enabled)) return;
|
|
atomic_fetch_add(&g_slots[cat].count, 1);
|
|
atomic_fetch_add(&g_slots[cat].sum_ns, ns);
|
|
}
|
|
|
|
void hkm_prof_inc(hkp_cat_t cat) {
|
|
if (!atomic_load(&g_enabled)) return;
|
|
atomic_fetch_add(&g_slots[cat].count, 1);
|
|
}
|
|
|
|
#endif // HAKMEM_PROF_STATIC
|