2025-11-05 12:31:14 +09:00
|
|
|
// hakmem_debug.c - Debug Timing Implementation
|
|
|
|
|
//
|
|
|
|
|
// License: MIT
|
|
|
|
|
// Date: 2025-10-21
|
|
|
|
|
|
|
|
|
|
#include "hakmem_debug.h"
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
|
|
#if HAKMEM_DEBUG_TIMING
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// Global State
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
static int g_timing_enabled = 0; // Runtime enable flag (set from HAKMEM_TIMING env)
|
|
|
|
|
static int g_initialized = 0;
|
|
|
|
|
|
|
|
|
|
// Thread-local statistics (lock-free, per-thread accumulation)
|
|
|
|
|
static _Thread_local HkmTimingStat g_tls_stats[HKM_CAT_MAX];
|
|
|
|
|
|
|
|
|
|
// Global statistics (accumulated from all threads at shutdown)
|
|
|
|
|
static HkmTimingStat g_global_stats[HKM_CAT_MAX];
|
|
|
|
|
|
|
|
|
|
// Category names for pretty printing (Phase 6.11.3: Updated for profiling)
|
|
|
|
|
static const char* g_category_names[HKM_CAT_MAX] = {
|
|
|
|
|
// Syscalls and fallbacks
|
|
|
|
|
"syscall_mmap",
|
|
|
|
|
"syscall_munmap",
|
|
|
|
|
"syscall_madvise",
|
|
|
|
|
"fallback_malloc",
|
|
|
|
|
|
|
|
|
|
// Whale Fast-Path
|
|
|
|
|
"whale_get",
|
|
|
|
|
"whale_put",
|
|
|
|
|
|
|
|
|
|
// L2.5 Pool
|
|
|
|
|
"l25_get",
|
|
|
|
|
"l25_put",
|
|
|
|
|
"l25_refill",
|
|
|
|
|
|
|
|
|
|
// Tiny Pool
|
|
|
|
|
"tiny_alloc",
|
|
|
|
|
"tiny_free",
|
|
|
|
|
"tiny_slab_search",
|
|
|
|
|
|
|
|
|
|
// BigCache
|
|
|
|
|
"bigcache_get",
|
|
|
|
|
"bigcache_put",
|
|
|
|
|
"bigcache_evict_scan",
|
|
|
|
|
|
|
|
|
|
// Site Rules
|
|
|
|
|
"site_rules_lookup",
|
|
|
|
|
"site_rules_adopt",
|
|
|
|
|
|
|
|
|
|
// ELO Learning
|
|
|
|
|
"elo_select",
|
|
|
|
|
"elo_update",
|
|
|
|
|
|
|
|
|
|
// Top-level API
|
|
|
|
|
"hak_alloc",
|
|
|
|
|
"hak_free",
|
|
|
|
|
|
|
|
|
|
// Legacy
|
|
|
|
|
"region_get",
|
|
|
|
|
"region_put",
|
|
|
|
|
"hash_fnv1a",
|
|
|
|
|
|
|
|
|
|
// Mid (L2 Pool) fine-grained
|
|
|
|
|
"pool_get",
|
|
|
|
|
"pool_lock",
|
|
|
|
|
"pool_refill",
|
|
|
|
|
"tc_drain",
|
|
|
|
|
"tls_fast",
|
|
|
|
|
"shard_steal",
|
|
|
|
|
// Additional Mid (L2) fine-grained (NEW)
|
|
|
|
|
"pool_tls_ring_pop",
|
|
|
|
|
"pool_tls_lifo_pop",
|
|
|
|
|
"pool_remote_push",
|
|
|
|
|
"pool_alloc_tls_page",
|
|
|
|
|
|
|
|
|
|
// L2.5 (LargePool) fine-grained (NEW)
|
|
|
|
|
"l25_lock",
|
|
|
|
|
"l25_tls_ring_pop",
|
|
|
|
|
"l25_tls_lifo_pop",
|
|
|
|
|
"l25_tc_drain",
|
|
|
|
|
"l25_remote_push",
|
|
|
|
|
"l25_alloc_tls_page",
|
|
|
|
|
"l25_shard_steal",
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// Internal Helpers
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
static void dump_stats_internal(void) {
|
|
|
|
|
fprintf(stderr, "\n========================================\n");
|
|
|
|
|
fprintf(stderr, "HAKMEM Debug Timing Statistics\n");
|
|
|
|
|
fprintf(stderr, "========================================\n");
|
|
|
|
|
|
|
|
|
|
uint64_t total_cycles = 0;
|
|
|
|
|
for (int i = 0; i < HKM_CAT_MAX; i++) {
|
|
|
|
|
total_cycles += g_global_stats[i].cycles;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < HKM_CAT_MAX; i++) {
|
|
|
|
|
HkmTimingStat* stat = &g_global_stats[i];
|
|
|
|
|
if (stat->count == 0) continue;
|
|
|
|
|
|
|
|
|
|
double avg_cycles = (double)stat->cycles / stat->count;
|
|
|
|
|
double pct = total_cycles > 0 ? (double)stat->cycles / total_cycles * 100.0 : 0.0;
|
|
|
|
|
|
|
|
|
|
fprintf(stderr, "%-24s: count=%8lu avg=%8.0f cycles total=%12lu cycles (%.1f%%)\n",
|
|
|
|
|
g_category_names[i],
|
|
|
|
|
(unsigned long)stat->count,
|
|
|
|
|
avg_cycles,
|
|
|
|
|
(unsigned long)stat->cycles,
|
|
|
|
|
pct);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fprintf(stderr, "========================================\n");
|
|
|
|
|
fprintf(stderr, "Total cycles: %lu\n", (unsigned long)total_cycles);
|
|
|
|
|
fprintf(stderr, "========================================\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void accumulate_tls_to_global(void) {
|
|
|
|
|
// Accumulate thread-local stats to global (called at shutdown)
|
|
|
|
|
for (int i = 0; i < HKM_CAT_MAX; i++) {
|
|
|
|
|
g_global_stats[i].count += g_tls_stats[i].count;
|
|
|
|
|
g_global_stats[i].cycles += g_tls_stats[i].cycles;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void atexit_handler(void) {
|
|
|
|
|
if (!g_initialized) return;
|
|
|
|
|
|
|
|
|
|
// Accumulate TLS to global
|
|
|
|
|
accumulate_tls_to_global();
|
|
|
|
|
|
|
|
|
|
// Dump stats
|
|
|
|
|
dump_stats_internal();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// Public API Implementation
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
void hkm_timing_init(void) {
|
|
|
|
|
if (g_initialized) return;
|
|
|
|
|
|
2025-11-28 01:05:18 +09:00
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
|
|
|
// Check HAKMEM_TIMING environment variable (gated behind !HAKMEM_BUILD_RELEASE)
|
2025-11-05 12:31:14 +09:00
|
|
|
const char* env = getenv("HAKMEM_TIMING");
|
|
|
|
|
if (env && strcmp(env, "1") == 0) {
|
|
|
|
|
g_timing_enabled = 1;
|
|
|
|
|
}
|
2025-11-28 01:05:18 +09:00
|
|
|
#else
|
|
|
|
|
g_timing_enabled = 0; // Always disabled in release builds
|
|
|
|
|
#endif
|
2025-11-05 12:31:14 +09:00
|
|
|
|
|
|
|
|
// Initialize global stats
|
|
|
|
|
memset(g_global_stats, 0, sizeof(g_global_stats));
|
|
|
|
|
|
|
|
|
|
// Initialize TLS stats
|
|
|
|
|
memset(g_tls_stats, 0, sizeof(g_tls_stats));
|
|
|
|
|
|
|
|
|
|
// Register atexit handler
|
|
|
|
|
atexit(atexit_handler);
|
|
|
|
|
|
|
|
|
|
g_initialized = 1;
|
|
|
|
|
|
|
|
|
|
if (g_timing_enabled) {
|
|
|
|
|
fprintf(stderr, "[HAKMEM Timing] Enabled (HAKMEM_TIMING=1)\n");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void hkm_timing_shutdown(void) {
|
|
|
|
|
if (!g_initialized) return;
|
|
|
|
|
|
|
|
|
|
// Accumulate TLS to global
|
|
|
|
|
accumulate_tls_to_global();
|
|
|
|
|
|
|
|
|
|
// Dump stats
|
|
|
|
|
dump_stats_internal();
|
|
|
|
|
|
|
|
|
|
g_initialized = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void hkm_count_inc(HkmTimingCategory cat) {
|
|
|
|
|
if (!g_timing_enabled) return;
|
|
|
|
|
if (cat >= HKM_CAT_MAX) return;
|
|
|
|
|
|
|
|
|
|
g_tls_stats[cat].count++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void hkm_cycles_add(HkmTimingCategory cat, uint64_t cycles) {
|
|
|
|
|
if (!g_timing_enabled) return;
|
|
|
|
|
if (cat >= HKM_CAT_MAX) return;
|
|
|
|
|
|
|
|
|
|
g_tls_stats[cat].cycles += cycles;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void hkm_timing_dump(void) {
|
|
|
|
|
if (!g_initialized) return;
|
|
|
|
|
|
|
|
|
|
// Accumulate TLS to global
|
|
|
|
|
accumulate_tls_to_global();
|
|
|
|
|
|
|
|
|
|
// Dump stats
|
|
|
|
|
dump_stats_internal();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
|
|
// Build-time disabled: empty implementations
|
|
|
|
|
|
|
|
|
|
void hkm_timing_init(void) {}
|
|
|
|
|
void hkm_timing_shutdown(void) {}
|
|
|
|
|
void hkm_count_inc(HkmTimingCategory cat) { (void)cat; }
|
|
|
|
|
void hkm_cycles_add(HkmTimingCategory cat, uint64_t cycles) { (void)cat; (void)cycles; }
|
|
|
|
|
void hkm_timing_dump(void) {}
|
|
|
|
|
|
|
|
|
|
#endif
|