// tiny_destructors.c — Tiny の終了処理と統計ダンプを箱化 #include "tiny_destructors.h" #include #include #include "box/tiny_hotheap_v2_box.h" #include "box/tiny_front_stats_box.h" #include "box/tiny_heap_box.h" #include "box/tiny_route_env_box.h" #include "box/tls_sll_box.h" #include "front/tiny_heap_v2.h" #include "hakmem_env_cache.h" #include "hakmem_tiny_magazine.h" #include "hakmem_tiny_stats_api.h" static int g_flush_on_exit = 0; static int g_ultra_debug_on_exit = 0; static int g_path_debug_on_exit = 0; // HotHeap v2 stats storage (defined in hakmem_tiny.c) extern _Atomic uint64_t g_tiny_hotheap_v2_route_hits[TINY_HOTHEAP_MAX_CLASSES]; extern _Atomic uint64_t g_tiny_hotheap_v2_alloc_calls[TINY_HOTHEAP_MAX_CLASSES]; extern _Atomic uint64_t g_tiny_hotheap_v2_alloc_fast[TINY_HOTHEAP_MAX_CLASSES]; extern _Atomic uint64_t g_tiny_hotheap_v2_alloc_lease[TINY_HOTHEAP_MAX_CLASSES]; extern _Atomic uint64_t g_tiny_hotheap_v2_alloc_fallback_v1[TINY_HOTHEAP_MAX_CLASSES]; extern _Atomic uint64_t g_tiny_hotheap_v2_alloc_refill[TINY_HOTHEAP_MAX_CLASSES]; extern _Atomic uint64_t g_tiny_hotheap_v2_refill_with_current[TINY_HOTHEAP_MAX_CLASSES]; extern _Atomic uint64_t g_tiny_hotheap_v2_refill_with_partial[TINY_HOTHEAP_MAX_CLASSES]; extern _Atomic uint64_t g_tiny_hotheap_v2_alloc_route_fb[TINY_HOTHEAP_MAX_CLASSES]; extern _Atomic uint64_t g_tiny_hotheap_v2_free_calls[TINY_HOTHEAP_MAX_CLASSES]; extern _Atomic uint64_t g_tiny_hotheap_v2_free_fast[TINY_HOTHEAP_MAX_CLASSES]; extern _Atomic uint64_t g_tiny_hotheap_v2_free_fallback_v1[TINY_HOTHEAP_MAX_CLASSES]; extern _Atomic uint64_t g_tiny_hotheap_v2_cold_refill_fail[TINY_HOTHEAP_MAX_CLASSES]; extern _Atomic uint64_t g_tiny_hotheap_v2_cold_retire_calls[TINY_HOTHEAP_MAX_CLASSES]; extern _Atomic uint64_t g_tiny_hotheap_v2_retire_calls_v2[TINY_HOTHEAP_MAX_CLASSES]; extern _Atomic uint64_t g_tiny_hotheap_v2_partial_pushes[TINY_HOTHEAP_MAX_CLASSES]; extern _Atomic uint64_t g_tiny_hotheap_v2_partial_pops[TINY_HOTHEAP_MAX_CLASSES]; extern _Atomic uint64_t g_tiny_hotheap_v2_partial_peak[TINY_HOTHEAP_MAX_CLASSES]; extern TinyHotHeapV2PageStats g_tiny_hotheap_v2_page_stats[TINY_HOTHEAP_MAX_CLASSES]; extern _Atomic uint64_t g_tiny_alloc_ge1024[TINY_NUM_CLASSES]; extern _Atomic uint64_t g_tls_sll_invalid_head[TINY_NUM_CLASSES]; extern _Atomic uint64_t g_tls_sll_invalid_push[TINY_NUM_CLASSES]; static void hak_flush_tiny_exit(void) { if (g_flush_on_exit) { hak_tiny_magazine_flush_all(); hak_tiny_trim(); } if (g_ultra_debug_on_exit) { hak_tiny_ultra_debug_dump(); } // Path debug dump (optional): HAKMEM_TINY_PATH_DEBUG=1 hak_tiny_path_debug_dump(); // Extended counters (optional): HAKMEM_TINY_COUNTERS_DUMP=1 hak_tiny_debug_counters_dump(); // DEBUG: Print SuperSlab accounting stats extern _Atomic uint64_t g_ss_active_dec_calls; extern _Atomic uint64_t g_hak_tiny_free_calls; extern _Atomic uint64_t g_ss_remote_push_calls; extern _Atomic uint64_t g_free_ss_enter; extern _Atomic uint64_t g_free_local_box_calls; extern _Atomic uint64_t g_free_remote_box_calls; extern uint64_t g_superslabs_allocated; extern uint64_t g_superslabs_freed; fprintf(stderr, "\n[EXIT DEBUG] SuperSlab Accounting:\n"); fprintf(stderr, " g_superslabs_allocated = %llu\n", (unsigned long long)g_superslabs_allocated); fprintf(stderr, " g_superslabs_freed = %llu\n", (unsigned long long)g_superslabs_freed); fprintf(stderr, " g_hak_tiny_free_calls = %llu\n", (unsigned long long)atomic_load_explicit(&g_hak_tiny_free_calls, memory_order_relaxed)); fprintf(stderr, " g_ss_remote_push_calls = %llu\n", (unsigned long long)atomic_load_explicit(&g_ss_remote_push_calls, memory_order_relaxed)); fprintf(stderr, " g_ss_active_dec_calls = %llu\n", (unsigned long long)atomic_load_explicit(&g_ss_active_dec_calls, memory_order_relaxed)); extern _Atomic uint64_t g_free_wrapper_calls; fprintf(stderr, " g_free_wrapper_calls = %llu\n", (unsigned long long)atomic_load_explicit(&g_free_wrapper_calls, memory_order_relaxed)); fprintf(stderr, " g_free_ss_enter = %llu\n", (unsigned long long)atomic_load_explicit(&g_free_ss_enter, memory_order_relaxed)); fprintf(stderr, " g_free_local_box_calls = %llu\n", (unsigned long long)atomic_load_explicit(&g_free_local_box_calls, memory_order_relaxed)); fprintf(stderr, " g_free_remote_box_calls = %llu\n", (unsigned long long)atomic_load_explicit(&g_free_remote_box_calls, memory_order_relaxed)); } void tiny_destructors_configure_from_env(void) { const char* tf = getenv("HAKMEM_TINY_FLUSH_ON_EXIT"); if (tf && atoi(tf) != 0) { g_flush_on_exit = 1; } const char* ud = getenv("HAKMEM_TINY_ULTRA_DEBUG"); if (ud && atoi(ud) != 0) { g_ultra_debug_on_exit = 1; } const char* pd = getenv("HAKMEM_TINY_PATH_DEBUG"); if (pd) { g_path_debug_on_exit = 1; } } void tiny_destructors_register_exit(void) { if (g_flush_on_exit || g_ultra_debug_on_exit || g_path_debug_on_exit) { atexit(hak_flush_tiny_exit); } } static int tiny_heap_stats_dump_enabled(void) { static int g = -1; if (__builtin_expect(g == -1, 0)) { const char* eh = getenv("HAKMEM_TINY_HEAP_STATS_DUMP"); const char* e = getenv("HAKMEM_TINY_C7_HEAP_STATS_DUMP"); g = ((eh && *eh && *eh != '0') || (e && *e && *e != '0')) ? 1 : 0; } return g; } __attribute__((destructor)) static void tiny_heap_stats_dump(void) { if (!tiny_heap_stats_enabled() || !tiny_heap_stats_dump_enabled()) { return; } for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) { TinyHeapClassStats snap = { .alloc_fast_current = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_fast_current, memory_order_relaxed), .alloc_slow_prepare = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_slow_prepare, memory_order_relaxed), .free_fast_local = atomic_load_explicit(&g_tiny_heap_stats[cls].free_fast_local, memory_order_relaxed), .free_slow_fallback = atomic_load_explicit(&g_tiny_heap_stats[cls].free_slow_fallback, memory_order_relaxed), .alloc_prepare_fail = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_prepare_fail, memory_order_relaxed), .alloc_fail = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_fail, memory_order_relaxed), }; if (snap.alloc_fast_current == 0 && snap.alloc_slow_prepare == 0 && snap.free_fast_local == 0 && snap.free_slow_fallback == 0 && snap.alloc_prepare_fail == 0 && snap.alloc_fail == 0) { continue; } fprintf(stderr, "[HEAP_STATS cls=%d] alloc_fast_current=%llu alloc_slow_prepare=%llu free_fast_local=%llu free_slow_fallback=%llu alloc_prepare_fail=%llu alloc_fail=%llu\n", cls, (unsigned long long)snap.alloc_fast_current, (unsigned long long)snap.alloc_slow_prepare, (unsigned long long)snap.free_fast_local, (unsigned long long)snap.free_slow_fallback, (unsigned long long)snap.alloc_prepare_fail, (unsigned long long)snap.alloc_fail); } TinyC7PageStats ps = { .prepare_calls = atomic_load_explicit(&g_c7_page_stats.prepare_calls, memory_order_relaxed), .prepare_with_current_null = atomic_load_explicit(&g_c7_page_stats.prepare_with_current_null, memory_order_relaxed), .prepare_from_partial = atomic_load_explicit(&g_c7_page_stats.prepare_from_partial, memory_order_relaxed), .current_set_from_free = atomic_load_explicit(&g_c7_page_stats.current_set_from_free, memory_order_relaxed), .current_dropped_to_partial = atomic_load_explicit(&g_c7_page_stats.current_dropped_to_partial, memory_order_relaxed), }; if (ps.prepare_calls || ps.prepare_with_current_null || ps.prepare_from_partial || ps.current_set_from_free || ps.current_dropped_to_partial) { fprintf(stderr, "[C7_PAGE_STATS] prepare_calls=%llu prepare_with_current_null=%llu prepare_from_partial=%llu current_set_from_free=%llu current_dropped_to_partial=%llu\n", (unsigned long long)ps.prepare_calls, (unsigned long long)ps.prepare_with_current_null, (unsigned long long)ps.prepare_from_partial, (unsigned long long)ps.current_set_from_free, (unsigned long long)ps.current_dropped_to_partial); fflush(stderr); } } __attribute__((destructor)) static void tiny_front_class_stats_dump(void) { if (!tiny_front_class_stats_dump_enabled()) { return; } for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) { uint64_t a = atomic_load_explicit(&g_tiny_front_alloc_class[cls], memory_order_relaxed); uint64_t f = atomic_load_explicit(&g_tiny_front_free_class[cls], memory_order_relaxed); if (a == 0 && f == 0) { continue; } fprintf(stderr, "[FRONT_CLASS cls=%d] alloc=%llu free=%llu\n", cls, (unsigned long long)a, (unsigned long long)f); } } __attribute__((destructor)) static void tiny_c7_delta_debug_destructor(void) { if (tiny_c7_meta_light_enabled() && tiny_c7_delta_debug_enabled()) { tiny_c7_heap_debug_dump_deltas(); } if (tiny_heap_meta_light_enabled_for_class(6) && tiny_c6_delta_debug_enabled()) { tiny_c6_heap_debug_dump_deltas(); } } __attribute__((destructor)) static void tiny_hotheap_v2_stats_dump(void) { if (!tiny_hotheap_v2_stats_enabled()) { return; } for (uint8_t ci = 0; ci < TINY_HOTHEAP_MAX_CLASSES; ci++) { uint64_t alloc_calls = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_calls[ci], memory_order_relaxed); uint64_t route_hits = atomic_load_explicit(&g_tiny_hotheap_v2_route_hits[ci], memory_order_relaxed); uint64_t alloc_fast = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_fast[ci], memory_order_relaxed); uint64_t alloc_lease = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_lease[ci], memory_order_relaxed); uint64_t alloc_fb = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_fallback_v1[ci], memory_order_relaxed); uint64_t free_calls = atomic_load_explicit(&g_tiny_hotheap_v2_free_calls[ci], memory_order_relaxed); uint64_t free_fast = atomic_load_explicit(&g_tiny_hotheap_v2_free_fast[ci], memory_order_relaxed); uint64_t free_fb = atomic_load_explicit(&g_tiny_hotheap_v2_free_fallback_v1[ci], memory_order_relaxed); uint64_t cold_refill_fail = atomic_load_explicit(&g_tiny_hotheap_v2_cold_refill_fail[ci], memory_order_relaxed); uint64_t cold_retire_calls = atomic_load_explicit(&g_tiny_hotheap_v2_cold_retire_calls[ci], memory_order_relaxed); uint64_t retire_calls_v2 = atomic_load_explicit(&g_tiny_hotheap_v2_retire_calls_v2[ci], memory_order_relaxed); uint64_t partial_pushes = atomic_load_explicit(&g_tiny_hotheap_v2_partial_pushes[ci], memory_order_relaxed); uint64_t partial_pops = atomic_load_explicit(&g_tiny_hotheap_v2_partial_pops[ci], memory_order_relaxed); uint64_t partial_peak = atomic_load_explicit(&g_tiny_hotheap_v2_partial_peak[ci], memory_order_relaxed); uint64_t refill_with_cur = atomic_load_explicit(&g_tiny_hotheap_v2_refill_with_current[ci], memory_order_relaxed); uint64_t refill_with_partial = atomic_load_explicit(&g_tiny_hotheap_v2_refill_with_partial[ci], memory_order_relaxed); TinyHotHeapV2PageStats ps = { .prepare_calls = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_calls, memory_order_relaxed), .prepare_with_current_null = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_with_current_null, memory_order_relaxed), .prepare_from_partial = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_from_partial, memory_order_relaxed), .free_made_current = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].free_made_current, memory_order_relaxed), .page_retired = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].page_retired, memory_order_relaxed), }; if (!(alloc_calls || alloc_fast || alloc_lease || alloc_fb || free_calls || free_fast || free_fb || ps.prepare_calls || ps.prepare_with_current_null || ps.prepare_from_partial || ps.free_made_current || ps.page_retired || retire_calls_v2 || partial_pushes || partial_pops || partial_peak)) { continue; } tiny_route_kind_t route_kind = tiny_route_for_class(ci); fprintf(stderr, "[HOTHEAP_V2_STATS cls=%u route=%d] route_hits=%llu alloc_calls=%llu alloc_fast=%llu alloc_lease=%llu alloc_refill=%llu refill_cur=%llu refill_partial=%llu alloc_fb_v1=%llu alloc_route_fb=%llu cold_refill_fail=%llu cold_retire_calls=%llu retire_v2=%llu free_calls=%llu free_fast=%llu free_fb_v1=%llu prep_calls=%llu prep_null=%llu prep_from_partial=%llu free_made_current=%llu page_retired=%llu partial_push=%llu partial_pop=%llu partial_peak=%llu\n", (unsigned)ci, (int)route_kind, (unsigned long long)route_hits, (unsigned long long)alloc_calls, (unsigned long long)alloc_fast, (unsigned long long)alloc_lease, (unsigned long long)atomic_load_explicit(&g_tiny_hotheap_v2_alloc_refill[ci], memory_order_relaxed), (unsigned long long)refill_with_cur, (unsigned long long)refill_with_partial, (unsigned long long)alloc_fb, (unsigned long long)atomic_load_explicit(&g_tiny_hotheap_v2_alloc_route_fb[ci], memory_order_relaxed), (unsigned long long)cold_refill_fail, (unsigned long long)cold_retire_calls, (unsigned long long)retire_calls_v2, (unsigned long long)free_calls, (unsigned long long)free_fast, (unsigned long long)free_fb, (unsigned long long)ps.prepare_calls, (unsigned long long)ps.prepare_with_current_null, (unsigned long long)ps.prepare_from_partial, (unsigned long long)ps.free_made_current, (unsigned long long)ps.page_retired, (unsigned long long)partial_pushes, (unsigned long long)partial_pops, (unsigned long long)partial_peak); } } static void tiny_heap_v2_stats_atexit(void) __attribute__((destructor)); static void tiny_heap_v2_stats_atexit(void) { tiny_heap_v2_print_stats(); } static void tiny_alloc_1024_diag_atexit(void) __attribute__((destructor)); static void tiny_alloc_1024_diag_atexit(void) { // Priority-2: Use cached ENV if (!HAK_ENV_TINY_ALLOC_1024_METRIC()) return; fprintf(stderr, "\n[ALLOC_GE1024] per-class counts (size>=1024)\n"); for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) { uint64_t v = atomic_load_explicit(&g_tiny_alloc_ge1024[cls], memory_order_relaxed); if (v) { fprintf(stderr, " C%d=%llu", cls, (unsigned long long)v); } } fprintf(stderr, "\n"); } static void tiny_tls_sll_diag_atexit(void) __attribute__((destructor)); static void tiny_tls_sll_diag_atexit(void) { #if !HAKMEM_BUILD_RELEASE // Priority-2: Use cached ENV if (!HAK_ENV_TINY_SLL_DIAG()) return; fprintf(stderr, "\n[TLS_SLL_DIAG] invalid head/push counts per class\n"); for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) { uint64_t ih = atomic_load_explicit(&g_tls_sll_invalid_head[cls], memory_order_relaxed); uint64_t ip = atomic_load_explicit(&g_tls_sll_invalid_push[cls], memory_order_relaxed); if (ih || ip) { fprintf(stderr, " C%d: invalid_head=%llu invalid_push=%llu\n", cls, (unsigned long long)ih, (unsigned long long)ip); } } #endif }