Files
hakmem/core/hakmem_debug.h

198 lines
5.5 KiB
C
Raw Permalink Normal View History

// hakmem_debug.h - Debug Timing Infrastructure (Box理論: 計測箱)
// Purpose: Lightweight timing measurement for performance analysis
//
// License: MIT
// Date: 2025-10-21
#pragma once
#include <stdint.h>
#include <time.h>
#ifdef __cplusplus
extern "C" {
#endif
// ============================================================================
// Configuration (環境変数で制御可能)
// ============================================================================
// Build-time guard: HAKMEM_DEBUG_TIMING=1 enables all timing code
#ifndef HAKMEM_DEBUG_TIMING
#define HAKMEM_DEBUG_TIMING 0 // Default: OFF (zero overhead)
#endif
// Runtime guard: HAKMEM_TIMING=1 enables timing at runtime
// Sampling rate: HAKMEM_TIMING_SAMPLE=N (default: 64, means 1/64 sampling)
// ============================================================================
// Timing Categories (計測カテゴリID)
// ============================================================================
typedef enum {
// Syscalls and fallbacks
HKM_CAT_SYSCALL_MMAP = 0,
HKM_CAT_SYSCALL_MUNMAP,
HKM_CAT_SYSCALL_MADVISE,
HKM_CAT_FALLBACK_MALLOC,
// Whale Fast-Path
HKM_CAT_WHALE_GET,
HKM_CAT_WHALE_PUT,
// L2.5 Pool (Phase 6.11.3: Added for profiling)
HKM_CAT_L25_GET,
HKM_CAT_L25_PUT,
HKM_CAT_L25_REFILL,
// Tiny Pool (Phase 6.11.3: Added for profiling)
HKM_CAT_TINY_ALLOC,
HKM_CAT_TINY_FREE,
HKM_CAT_TINY_SLAB_SEARCH,
// BigCache (Phase 6.11.3: Added for profiling)
HKM_CAT_BIGCACHE_GET,
HKM_CAT_BIGCACHE_PUT,
HKM_CAT_BIGCACHE_EVICT_SCAN,
// Site Rules (Phase 6.11.3: Added for profiling)
HKM_CAT_SITE_RULES_LOOKUP,
HKM_CAT_SITE_RULES_ADOPT,
// ELO Learning (Phase 6.11.3: Added for profiling)
HKM_CAT_ELO_SELECT,
HKM_CAT_ELO_UPDATE,
// Top-level API (Phase 6.11.3: Added for profiling)
HKM_CAT_HAK_ALLOC,
HKM_CAT_HAK_FREE,
// Legacy (keep for compatibility)
HKM_CAT_REGION_GET,
HKM_CAT_REGION_PUT,
HKM_CAT_HASH_FNV1A,
// Mid (L2 Pool) fine-grained
HKM_CAT_POOL_GET,
HKM_CAT_POOL_LOCK,
HKM_CAT_POOL_REFILL,
HKM_CAT_TC_DRAIN,
HKM_CAT_TLS_FAST,
HKM_CAT_SHARD_STEAL,
// Additional Mid (L2) fine-grained (NEW)
HKM_CAT_POOL_TLS_RING_POP,
HKM_CAT_POOL_TLS_LIFO_POP,
HKM_CAT_POOL_REMOTE_PUSH,
HKM_CAT_POOL_ALLOC_TLS_PAGE,
// L2.5 (LargePool) fine-grained (NEW)
HKM_CAT_L25_LOCK,
HKM_CAT_L25_TLS_RING_POP,
HKM_CAT_L25_TLS_LIFO_POP,
HKM_CAT_L25_TC_DRAIN,
HKM_CAT_L25_REMOTE_PUSH,
HKM_CAT_L25_ALLOC_TLS_PAGE,
HKM_CAT_L25_SHARD_STEAL,
HKM_CAT_MAX // Total number of categories
} HkmTimingCategory;
// ============================================================================
// Timing Statistics (TLS per-thread)
// ============================================================================
typedef struct {
uint64_t count; // Number of calls
uint64_t cycles; // Total cycles (TSC or nanoseconds)
} HkmTimingStat;
// ============================================================================
// Public API
// ============================================================================
// Initialize timing subsystem (called from hak_init)
void hkm_timing_init(void);
// Shutdown timing subsystem (called from hak_shutdown, dumps stats)
void hkm_timing_shutdown(void);
// Get current timestamp (TSC or clock_gettime)
static inline uint64_t hkm_tsc_now(void);
// Increment counter for category
void hkm_count_inc(HkmTimingCategory cat);
// Add cycles to category
void hkm_cycles_add(HkmTimingCategory cat, uint64_t cycles);
// Dump all statistics (called at exit or on demand)
void hkm_timing_dump(void);
// ============================================================================
// Timing Macros (inline化、ビルド時に消える)
// ============================================================================
#if HAKMEM_DEBUG_TIMING
// HKM_TIME_START(var) - Start timing, stores start time in var
#define HKM_TIME_START(var) \
uint64_t var = hkm_tsc_now()
// HKM_TIME_END(cat, var) - End timing, add elapsed time to category
#define HKM_TIME_END(cat, var) \
do { \
uint64_t _end = hkm_tsc_now(); \
hkm_cycles_add(cat, _end - var); \
hkm_count_inc(cat); \
} while (0)
// HKM_CNT(cat) - Increment count only (no timing)
#define HKM_CNT(cat) \
hkm_count_inc(cat)
#else
// Build-time disabled: declare variables but no timing code (zero overhead)
#define HKM_TIME_START(var) \
uint64_t var = 0
#define HKM_TIME_END(cat, var) \
do { (void)(cat); (void)(var); } while (0)
#define HKM_CNT(cat) \
do { (void)(cat); } while (0)
#endif
// ============================================================================
// Internal: TSC/Clock Implementation (inline for zero overhead)
// ============================================================================
#if HAKMEM_DEBUG_TIMING
static inline uint64_t hkm_tsc_now(void) {
#if defined(__x86_64__) || defined(__i386__)
// x86/x64: Use RDTSC (fast, ~10 cycles)
uint32_t lo, hi;
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
return ((uint64_t)hi << 32) | lo;
#else
// Non-x86: Use clock_gettime (slower, but portable)
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
return (uint64_t)ts.tv_sec * 1000000000ULL + (uint64_t)ts.tv_nsec;
#endif
}
#else
// Build-time disabled: empty inline function
static inline uint64_t hkm_tsc_now(void) {
return 0;
}
#endif
#ifdef __cplusplus
}
#endif