// hakmem_debug.h - Debug Timing Infrastructure (Box理論: 計測箱) // Purpose: Lightweight timing measurement for performance analysis // // License: MIT // Date: 2025-10-21 #pragma once #include #include #ifdef __cplusplus extern "C" { #endif // ============================================================================ // Configuration (環境変数で制御可能) // ============================================================================ // Build-time guard: HAKMEM_DEBUG_TIMING=1 enables all timing code #ifndef HAKMEM_DEBUG_TIMING #define HAKMEM_DEBUG_TIMING 0 // Default: OFF (zero overhead) #endif // Runtime guard: HAKMEM_TIMING=1 enables timing at runtime // Sampling rate: HAKMEM_TIMING_SAMPLE=N (default: 64, means 1/64 sampling) // ============================================================================ // Timing Categories (計測カテゴリID) // ============================================================================ typedef enum { // Syscalls and fallbacks HKM_CAT_SYSCALL_MMAP = 0, HKM_CAT_SYSCALL_MUNMAP, HKM_CAT_SYSCALL_MADVISE, HKM_CAT_FALLBACK_MALLOC, // Whale Fast-Path HKM_CAT_WHALE_GET, HKM_CAT_WHALE_PUT, // L2.5 Pool (Phase 6.11.3: Added for profiling) HKM_CAT_L25_GET, HKM_CAT_L25_PUT, HKM_CAT_L25_REFILL, // Tiny Pool (Phase 6.11.3: Added for profiling) HKM_CAT_TINY_ALLOC, HKM_CAT_TINY_FREE, HKM_CAT_TINY_SLAB_SEARCH, // BigCache (Phase 6.11.3: Added for profiling) HKM_CAT_BIGCACHE_GET, HKM_CAT_BIGCACHE_PUT, HKM_CAT_BIGCACHE_EVICT_SCAN, // Site Rules (Phase 6.11.3: Added for profiling) HKM_CAT_SITE_RULES_LOOKUP, HKM_CAT_SITE_RULES_ADOPT, // ELO Learning (Phase 6.11.3: Added for profiling) HKM_CAT_ELO_SELECT, HKM_CAT_ELO_UPDATE, // Top-level API (Phase 6.11.3: Added for profiling) HKM_CAT_HAK_ALLOC, HKM_CAT_HAK_FREE, // Legacy (keep for compatibility) HKM_CAT_REGION_GET, HKM_CAT_REGION_PUT, HKM_CAT_HASH_FNV1A, // Mid (L2 Pool) fine-grained HKM_CAT_POOL_GET, HKM_CAT_POOL_LOCK, HKM_CAT_POOL_REFILL, HKM_CAT_TC_DRAIN, HKM_CAT_TLS_FAST, HKM_CAT_SHARD_STEAL, // Additional Mid (L2) fine-grained (NEW) HKM_CAT_POOL_TLS_RING_POP, HKM_CAT_POOL_TLS_LIFO_POP, HKM_CAT_POOL_REMOTE_PUSH, HKM_CAT_POOL_ALLOC_TLS_PAGE, // L2.5 (LargePool) fine-grained (NEW) HKM_CAT_L25_LOCK, HKM_CAT_L25_TLS_RING_POP, HKM_CAT_L25_TLS_LIFO_POP, HKM_CAT_L25_TC_DRAIN, HKM_CAT_L25_REMOTE_PUSH, HKM_CAT_L25_ALLOC_TLS_PAGE, HKM_CAT_L25_SHARD_STEAL, HKM_CAT_MAX // Total number of categories } HkmTimingCategory; // ============================================================================ // Timing Statistics (TLS per-thread) // ============================================================================ typedef struct { uint64_t count; // Number of calls uint64_t cycles; // Total cycles (TSC or nanoseconds) } HkmTimingStat; // ============================================================================ // Public API // ============================================================================ // Initialize timing subsystem (called from hak_init) void hkm_timing_init(void); // Shutdown timing subsystem (called from hak_shutdown, dumps stats) void hkm_timing_shutdown(void); // Get current timestamp (TSC or clock_gettime) static inline uint64_t hkm_tsc_now(void); // Increment counter for category void hkm_count_inc(HkmTimingCategory cat); // Add cycles to category void hkm_cycles_add(HkmTimingCategory cat, uint64_t cycles); // Dump all statistics (called at exit or on demand) void hkm_timing_dump(void); // ============================================================================ // Timing Macros (inline化、ビルド時に消える) // ============================================================================ #if HAKMEM_DEBUG_TIMING // HKM_TIME_START(var) - Start timing, stores start time in var #define HKM_TIME_START(var) \ uint64_t var = hkm_tsc_now() // HKM_TIME_END(cat, var) - End timing, add elapsed time to category #define HKM_TIME_END(cat, var) \ do { \ uint64_t _end = hkm_tsc_now(); \ hkm_cycles_add(cat, _end - var); \ hkm_count_inc(cat); \ } while (0) // HKM_CNT(cat) - Increment count only (no timing) #define HKM_CNT(cat) \ hkm_count_inc(cat) #else // Build-time disabled: declare variables but no timing code (zero overhead) #define HKM_TIME_START(var) \ uint64_t var = 0 #define HKM_TIME_END(cat, var) \ do { (void)(cat); (void)(var); } while (0) #define HKM_CNT(cat) \ do { (void)(cat); } while (0) #endif // ============================================================================ // Internal: TSC/Clock Implementation (inline for zero overhead) // ============================================================================ #if HAKMEM_DEBUG_TIMING static inline uint64_t hkm_tsc_now(void) { #if defined(__x86_64__) || defined(__i386__) // x86/x64: Use RDTSC (fast, ~10 cycles) uint32_t lo, hi; __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); return ((uint64_t)hi << 32) | lo; #else // Non-x86: Use clock_gettime (slower, but portable) struct timespec ts; clock_gettime(CLOCK_MONOTONIC_RAW, &ts); return (uint64_t)ts.tv_sec * 1000000000ULL + (uint64_t)ts.tv_nsec; #endif } #else // Build-time disabled: empty inline function static inline uint64_t hkm_tsc_now(void) { return 0; } #endif #ifdef __cplusplus } #endif