// hakmem_debug.c - Debug Timing Implementation // // License: MIT // Date: 2025-10-21 #include "hakmem_debug.h" #include "hakmem_env_cache.h" // Priority-2: ENV cache #include #include #include #if HAKMEM_DEBUG_TIMING // ============================================================================ // Global State // ============================================================================ static int g_timing_enabled = 0; // Runtime enable flag (set from HAKMEM_TIMING env) static int g_initialized = 0; // Thread-local statistics (lock-free, per-thread accumulation) static _Thread_local HkmTimingStat g_tls_stats[HKM_CAT_MAX]; // Global statistics (accumulated from all threads at shutdown) static HkmTimingStat g_global_stats[HKM_CAT_MAX]; // Category names for pretty printing (Phase 6.11.3: Updated for profiling) static const char* g_category_names[HKM_CAT_MAX] = { // Syscalls and fallbacks "syscall_mmap", "syscall_munmap", "syscall_madvise", "fallback_malloc", // Whale Fast-Path "whale_get", "whale_put", // L2.5 Pool "l25_get", "l25_put", "l25_refill", // Tiny Pool "tiny_alloc", "tiny_free", "tiny_slab_search", // BigCache "bigcache_get", "bigcache_put", "bigcache_evict_scan", // Site Rules "site_rules_lookup", "site_rules_adopt", // ELO Learning "elo_select", "elo_update", // Top-level API "hak_alloc", "hak_free", // Legacy "region_get", "region_put", "hash_fnv1a", // Mid (L2 Pool) fine-grained "pool_get", "pool_lock", "pool_refill", "tc_drain", "tls_fast", "shard_steal", // Additional Mid (L2) fine-grained (NEW) "pool_tls_ring_pop", "pool_tls_lifo_pop", "pool_remote_push", "pool_alloc_tls_page", // L2.5 (LargePool) fine-grained (NEW) "l25_lock", "l25_tls_ring_pop", "l25_tls_lifo_pop", "l25_tc_drain", "l25_remote_push", "l25_alloc_tls_page", "l25_shard_steal", }; // ============================================================================ // Internal Helpers // ============================================================================ static void dump_stats_internal(void) { fprintf(stderr, "\n========================================\n"); fprintf(stderr, "HAKMEM Debug Timing Statistics\n"); fprintf(stderr, "========================================\n"); uint64_t total_cycles = 0; for (int i = 0; i < HKM_CAT_MAX; i++) { total_cycles += g_global_stats[i].cycles; } for (int i = 0; i < HKM_CAT_MAX; i++) { HkmTimingStat* stat = &g_global_stats[i]; if (stat->count == 0) continue; double avg_cycles = (double)stat->cycles / stat->count; double pct = total_cycles > 0 ? (double)stat->cycles / total_cycles * 100.0 : 0.0; fprintf(stderr, "%-24s: count=%8lu avg=%8.0f cycles total=%12lu cycles (%.1f%%)\n", g_category_names[i], (unsigned long)stat->count, avg_cycles, (unsigned long)stat->cycles, pct); } fprintf(stderr, "========================================\n"); fprintf(stderr, "Total cycles: %lu\n", (unsigned long)total_cycles); fprintf(stderr, "========================================\n"); } static void accumulate_tls_to_global(void) { // Accumulate thread-local stats to global (called at shutdown) for (int i = 0; i < HKM_CAT_MAX; i++) { g_global_stats[i].count += g_tls_stats[i].count; g_global_stats[i].cycles += g_tls_stats[i].cycles; } } static void atexit_handler(void) { if (!g_initialized) return; // Accumulate TLS to global accumulate_tls_to_global(); // Dump stats dump_stats_internal(); } // ============================================================================ // Public API Implementation // ============================================================================ void hkm_timing_init(void) { if (g_initialized) return; #if !HAKMEM_BUILD_RELEASE // Priority-2: Use cached ENV (gated behind !HAKMEM_BUILD_RELEASE) g_timing_enabled = HAK_ENV_TIMING_ENABLED(); #else g_timing_enabled = 0; // Always disabled in release builds #endif // Initialize global stats memset(g_global_stats, 0, sizeof(g_global_stats)); // Initialize TLS stats memset(g_tls_stats, 0, sizeof(g_tls_stats)); // Register atexit handler atexit(atexit_handler); g_initialized = 1; if (g_timing_enabled) { fprintf(stderr, "[HAKMEM Timing] Enabled (HAKMEM_TIMING=1)\n"); } } void hkm_timing_shutdown(void) { if (!g_initialized) return; // Accumulate TLS to global accumulate_tls_to_global(); // Dump stats dump_stats_internal(); g_initialized = 0; } void hkm_count_inc(HkmTimingCategory cat) { if (!g_timing_enabled) return; if (cat >= HKM_CAT_MAX) return; g_tls_stats[cat].count++; } void hkm_cycles_add(HkmTimingCategory cat, uint64_t cycles) { if (!g_timing_enabled) return; if (cat >= HKM_CAT_MAX) return; g_tls_stats[cat].cycles += cycles; } void hkm_timing_dump(void) { if (!g_initialized) return; // Accumulate TLS to global accumulate_tls_to_global(); // Dump stats dump_stats_internal(); } #else // Build-time disabled: empty implementations void hkm_timing_init(void) {} void hkm_timing_shutdown(void) {} void hkm_count_inc(HkmTimingCategory cat) { (void)cat; } void hkm_cycles_add(HkmTimingCategory cat, uint64_t cycles) { (void)cat; (void)cycles; } void hkm_timing_dump(void) {} #endif