Major Features: - Debug counter infrastructure for Refill Stage tracking - Free Pipeline counters (ss_local, ss_remote, tls_sll) - Diagnostic counters for early return analysis - Unified larson.sh benchmark runner with profiles - Phase 6-3 regression analysis documentation Bug Fixes: - Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB) - Fix profile variable naming consistency - Add .gitignore patterns for large files Performance: - Phase 6-3: 4.79 M ops/s (has OOM risk) - With SuperSlab: 3.13 M ops/s (+19% improvement) This is a clean repository without large log files. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
43 lines
1.5 KiB
C
43 lines
1.5 KiB
C
// hakmem_tiny_drain_ema.inc.h
|
|
// Per-class drain batch sizing via simple EMA for Tiny assist drain
|
|
#ifndef HAKMEM_TINY_DRAIN_EMA_INC_H
|
|
#define HAKMEM_TINY_DRAIN_EMA_INC_H
|
|
|
|
#include <stdint.h>
|
|
|
|
#ifndef TINY_NUM_CLASSES
|
|
#define TINY_NUM_CLASSES 8
|
|
#endif
|
|
|
|
// Thread-local EMA of recently drained sets
|
|
static __thread uint16_t g_tls_drain_ema[TINY_NUM_CLASSES];
|
|
|
|
static inline uint16_t tiny_drain_target_from_ema(int class_idx) {
|
|
// Base drain minimum to make progress; cap to avoid long stalls
|
|
uint16_t ema = g_tls_drain_ema[class_idx];
|
|
uint16_t base = 8; // minimum sets to try
|
|
uint16_t mult = 2; // scale EMA
|
|
uint32_t want = (uint32_t)base + (uint32_t)ema * mult;
|
|
if (want > 64u) want = 64u; // hard cap per slow-path assist
|
|
return (uint16_t)want;
|
|
}
|
|
|
|
static inline void tiny_drain_ema_update(int class_idx, uint16_t drained_sets) {
|
|
// EMA with alpha=1/4: ema = (3*ema + drained) / 4
|
|
uint32_t ema = g_tls_drain_ema[class_idx];
|
|
uint32_t upd = (3u * ema + (uint32_t)drained_sets) >> 2;
|
|
if (upd > 1024u) upd = 1024u; // clamp
|
|
g_tls_drain_ema[class_idx] = (uint16_t)upd;
|
|
}
|
|
|
|
// Class-specific enqueue threshold for SS target (smaller for tiny sizes)
|
|
static inline uint16_t tiny_enqueue_threshold(int class_idx) {
|
|
if (class_idx <= 2) return 8; // 8/16/32B
|
|
if (class_idx <= 4) return 16; // 64/128B
|
|
return 24; // 256B+
|
|
}
|
|
|
|
// Note: free-path periodic assist is defined in hakmem_tiny_assist.inc.h
|
|
|
|
#endif // HAKMEM_TINY_DRAIN_EMA_INC_H
|