Debug Counters Implementation - Clean History
Major Features: - Debug counter infrastructure for Refill Stage tracking - Free Pipeline counters (ss_local, ss_remote, tls_sll) - Diagnostic counters for early return analysis - Unified larson.sh benchmark runner with profiles - Phase 6-3 regression analysis documentation Bug Fixes: - Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB) - Fix profile variable naming consistency - Add .gitignore patterns for large files Performance: - Phase 6-3: 4.79 M ops/s (has OOM risk) - With SuperSlab: 3.13 M ops/s (+19% improvement) This is a clean repository without large log files. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
197
core/hakmem_debug.h
Normal file
197
core/hakmem_debug.h
Normal file
@ -0,0 +1,197 @@
|
||||
// hakmem_debug.h - Debug Timing Infrastructure (Box理論: 計測箱)
|
||||
// Purpose: Lightweight timing measurement for performance analysis
|
||||
//
|
||||
// License: MIT
|
||||
// Date: 2025-10-21
|
||||
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
#include <time.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// ============================================================================
|
||||
// Configuration (環境変数で制御可能)
|
||||
// ============================================================================
|
||||
|
||||
// Build-time guard: HAKMEM_DEBUG_TIMING=1 enables all timing code
|
||||
#ifndef HAKMEM_DEBUG_TIMING
|
||||
#define HAKMEM_DEBUG_TIMING 0 // Default: OFF (zero overhead)
|
||||
#endif
|
||||
|
||||
// Runtime guard: HAKMEM_TIMING=1 enables timing at runtime
|
||||
// Sampling rate: HAKMEM_TIMING_SAMPLE=N (default: 64, means 1/64 sampling)
|
||||
|
||||
// ============================================================================
|
||||
// Timing Categories (計測カテゴリID)
|
||||
// ============================================================================
|
||||
|
||||
typedef enum {
|
||||
// Syscalls and fallbacks
|
||||
HKM_CAT_SYSCALL_MMAP = 0,
|
||||
HKM_CAT_SYSCALL_MUNMAP,
|
||||
HKM_CAT_SYSCALL_MADVISE,
|
||||
HKM_CAT_FALLBACK_MALLOC,
|
||||
|
||||
// Whale Fast-Path
|
||||
HKM_CAT_WHALE_GET,
|
||||
HKM_CAT_WHALE_PUT,
|
||||
|
||||
// L2.5 Pool (Phase 6.11.3: Added for profiling)
|
||||
HKM_CAT_L25_GET,
|
||||
HKM_CAT_L25_PUT,
|
||||
HKM_CAT_L25_REFILL,
|
||||
|
||||
// Tiny Pool (Phase 6.11.3: Added for profiling)
|
||||
HKM_CAT_TINY_ALLOC,
|
||||
HKM_CAT_TINY_FREE,
|
||||
HKM_CAT_TINY_SLAB_SEARCH,
|
||||
|
||||
// BigCache (Phase 6.11.3: Added for profiling)
|
||||
HKM_CAT_BIGCACHE_GET,
|
||||
HKM_CAT_BIGCACHE_PUT,
|
||||
HKM_CAT_BIGCACHE_EVICT_SCAN,
|
||||
|
||||
// Site Rules (Phase 6.11.3: Added for profiling)
|
||||
HKM_CAT_SITE_RULES_LOOKUP,
|
||||
HKM_CAT_SITE_RULES_ADOPT,
|
||||
|
||||
// ELO Learning (Phase 6.11.3: Added for profiling)
|
||||
HKM_CAT_ELO_SELECT,
|
||||
HKM_CAT_ELO_UPDATE,
|
||||
|
||||
// Top-level API (Phase 6.11.3: Added for profiling)
|
||||
HKM_CAT_HAK_ALLOC,
|
||||
HKM_CAT_HAK_FREE,
|
||||
|
||||
// Legacy (keep for compatibility)
|
||||
HKM_CAT_REGION_GET,
|
||||
HKM_CAT_REGION_PUT,
|
||||
HKM_CAT_HASH_FNV1A,
|
||||
|
||||
// Mid (L2 Pool) fine-grained
|
||||
HKM_CAT_POOL_GET,
|
||||
HKM_CAT_POOL_LOCK,
|
||||
HKM_CAT_POOL_REFILL,
|
||||
HKM_CAT_TC_DRAIN,
|
||||
HKM_CAT_TLS_FAST,
|
||||
HKM_CAT_SHARD_STEAL,
|
||||
// Additional Mid (L2) fine-grained (NEW)
|
||||
HKM_CAT_POOL_TLS_RING_POP,
|
||||
HKM_CAT_POOL_TLS_LIFO_POP,
|
||||
HKM_CAT_POOL_REMOTE_PUSH,
|
||||
HKM_CAT_POOL_ALLOC_TLS_PAGE,
|
||||
|
||||
// L2.5 (LargePool) fine-grained (NEW)
|
||||
HKM_CAT_L25_LOCK,
|
||||
HKM_CAT_L25_TLS_RING_POP,
|
||||
HKM_CAT_L25_TLS_LIFO_POP,
|
||||
HKM_CAT_L25_TC_DRAIN,
|
||||
HKM_CAT_L25_REMOTE_PUSH,
|
||||
HKM_CAT_L25_ALLOC_TLS_PAGE,
|
||||
HKM_CAT_L25_SHARD_STEAL,
|
||||
|
||||
HKM_CAT_MAX // Total number of categories
|
||||
} HkmTimingCategory;
|
||||
|
||||
// ============================================================================
|
||||
// Timing Statistics (TLS per-thread)
|
||||
// ============================================================================
|
||||
|
||||
typedef struct {
|
||||
uint64_t count; // Number of calls
|
||||
uint64_t cycles; // Total cycles (TSC or nanoseconds)
|
||||
} HkmTimingStat;
|
||||
|
||||
// ============================================================================
|
||||
// Public API
|
||||
// ============================================================================
|
||||
|
||||
// Initialize timing subsystem (called from hak_init)
|
||||
void hkm_timing_init(void);
|
||||
|
||||
// Shutdown timing subsystem (called from hak_shutdown, dumps stats)
|
||||
void hkm_timing_shutdown(void);
|
||||
|
||||
// Get current timestamp (TSC or clock_gettime)
|
||||
static inline uint64_t hkm_tsc_now(void);
|
||||
|
||||
// Increment counter for category
|
||||
void hkm_count_inc(HkmTimingCategory cat);
|
||||
|
||||
// Add cycles to category
|
||||
void hkm_cycles_add(HkmTimingCategory cat, uint64_t cycles);
|
||||
|
||||
// Dump all statistics (called at exit or on demand)
|
||||
void hkm_timing_dump(void);
|
||||
|
||||
// ============================================================================
|
||||
// Timing Macros (inline化、ビルド時に消える)
|
||||
// ============================================================================
|
||||
|
||||
#if HAKMEM_DEBUG_TIMING
|
||||
|
||||
// HKM_TIME_START(var) - Start timing, stores start time in var
|
||||
#define HKM_TIME_START(var) \
|
||||
uint64_t var = hkm_tsc_now()
|
||||
|
||||
// HKM_TIME_END(cat, var) - End timing, add elapsed time to category
|
||||
#define HKM_TIME_END(cat, var) \
|
||||
do { \
|
||||
uint64_t _end = hkm_tsc_now(); \
|
||||
hkm_cycles_add(cat, _end - var); \
|
||||
hkm_count_inc(cat); \
|
||||
} while (0)
|
||||
|
||||
// HKM_CNT(cat) - Increment count only (no timing)
|
||||
#define HKM_CNT(cat) \
|
||||
hkm_count_inc(cat)
|
||||
|
||||
#else
|
||||
|
||||
// Build-time disabled: declare variables but no timing code (zero overhead)
|
||||
#define HKM_TIME_START(var) \
|
||||
uint64_t var = 0
|
||||
|
||||
#define HKM_TIME_END(cat, var) \
|
||||
do { (void)(cat); (void)(var); } while (0)
|
||||
|
||||
#define HKM_CNT(cat) \
|
||||
do { (void)(cat); } while (0)
|
||||
|
||||
#endif
|
||||
|
||||
// ============================================================================
|
||||
// Internal: TSC/Clock Implementation (inline for zero overhead)
|
||||
// ============================================================================
|
||||
|
||||
#if HAKMEM_DEBUG_TIMING
|
||||
|
||||
static inline uint64_t hkm_tsc_now(void) {
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
// x86/x64: Use RDTSC (fast, ~10 cycles)
|
||||
uint32_t lo, hi;
|
||||
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
|
||||
return ((uint64_t)hi << 32) | lo;
|
||||
#else
|
||||
// Non-x86: Use clock_gettime (slower, but portable)
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
|
||||
return (uint64_t)ts.tv_sec * 1000000000ULL + (uint64_t)ts.tv_nsec;
|
||||
#endif
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// Build-time disabled: empty inline function
|
||||
static inline uint64_t hkm_tsc_now(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
Reference in New Issue
Block a user