Major Features: - Debug counter infrastructure for Refill Stage tracking - Free Pipeline counters (ss_local, ss_remote, tls_sll) - Diagnostic counters for early return analysis - Unified larson.sh benchmark runner with profiles - Phase 6-3 regression analysis documentation Bug Fixes: - Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB) - Fix profile variable naming consistency - Add .gitignore patterns for large files Performance: - Phase 6-3: 4.79 M ops/s (has OOM risk) - With SuperSlab: 3.13 M ops/s (+19% improvement) This is a clean repository without large log files. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
72 lines
2.3 KiB
C
72 lines
2.3 KiB
C
#ifndef HAKMEM_PROF_H
|
|
#define HAKMEM_PROF_H
|
|
|
|
#include <stdint.h>
|
|
#include <time.h>
|
|
|
|
// Lightweight, sampling-based profiler for hot paths (default OFF).
|
|
// Enable via env HAKMEM_PROF=1 and optional HAKMEM_PROF_SAMPLE=N (sample every 2^N calls, default 12).
|
|
|
|
typedef enum {
|
|
HKP_BIGCACHE_TRY = 0,
|
|
HKP_TINY_ALLOC,
|
|
HKP_ACE_ALLOC, // L1 (Mid/Large) unified
|
|
HKP_MALLOC_ALLOC,
|
|
HKP_MMAP_ALLOC,
|
|
// Fine-grained (sampling) categories
|
|
HKP_TINY_BITMAP, // tiny bitmap scan
|
|
HKP_TINY_DRAIN_LOCKED, // tiny remote drain under lock
|
|
HKP_TINY_DRAIN_OWNER, // tiny remote drain owner
|
|
HKP_TINY_SPILL, // tiny magazine spill under lock
|
|
HKP_TINY_REG_LOOKUP, // tiny registry lookup
|
|
HKP_TINY_REG_REGISTER, // tiny registry register
|
|
HKP_POOL_REFILL, // L2 pool refill
|
|
HKP_POOL_LOCK, // L2 pool lock wait
|
|
HKP_L25_REFILL, // L2.5 pool refill
|
|
HKP_L25_LOCK, // L2.5 lock wait
|
|
HKP_COUNT
|
|
} hkp_cat_t;
|
|
|
|
// Compile-time instrumentation gate.
|
|
// Set HAKMEM_PROF_STATIC=1 at compile time to include instrumentation code paths;
|
|
// otherwise, macros become no-ops and no symbols are referenced.
|
|
#ifndef HAKMEM_PROF_STATIC
|
|
#define HAKMEM_PROF_STATIC 0
|
|
#endif
|
|
|
|
#if HAKMEM_PROF_STATIC
|
|
void hkm_prof_init(void);
|
|
void hkm_prof_shutdown(void);
|
|
|
|
int hkm_prof_enabled(void);
|
|
int hkm_prof_should_sample(void);
|
|
void hkm_prof_record_ns(hkp_cat_t cat, uint64_t ns);
|
|
void hkm_prof_inc(hkp_cat_t cat);
|
|
|
|
// Helpers for scoped timing (only when sampling)
|
|
static inline int hkm_prof_begin(struct timespec* ts) {
|
|
if (!hkm_prof_should_sample()) return 0;
|
|
clock_gettime(CLOCK_MONOTONIC, ts);
|
|
return 1;
|
|
}
|
|
|
|
static inline void hkm_prof_end(int active, hkp_cat_t cat, struct timespec* ts) {
|
|
if (!active) return;
|
|
struct timespec te; clock_gettime(CLOCK_MONOTONIC, &te);
|
|
uint64_t ns = (te.tv_sec - ts->tv_sec) * 1000000000ULL + (te.tv_nsec - ts->tv_nsec);
|
|
hkm_prof_record_ns(cat, ns);
|
|
}
|
|
#else
|
|
// Compile-time disabled: define stubs as no-ops
|
|
#define hkm_prof_init() ((void)0)
|
|
#define hkm_prof_shutdown() ((void)0)
|
|
#define hkm_prof_enabled() (0)
|
|
#define hkm_prof_should_sample() (0)
|
|
#define hkm_prof_record_ns(cat,ns) do{}while(0)
|
|
#define hkm_prof_inc(cat) do{}while(0)
|
|
#define hkm_prof_begin(ts) 0
|
|
#define hkm_prof_end(a,cat,ts) do{}while(0)
|
|
#endif
|
|
|
|
#endif // HAKMEM_PROF_H
|