## Summary - ChatGPT により bench_profile.h の setenv segfault を修正(RTLD_NEXT 経由に切り替え) - core/box/pool_zero_mode_box.h 新設:ENV キャッシュ経由で ZERO_MODE を統一管理 - core/hakmem_pool.c で zero mode に応じた memset 制御(FULL/header/off) - A/B テスト結果:ZERO_MODE=header で +15.34% improvement(1M iterations, C6-heavy) ## Files Modified - core/box/pool_api.inc.h: pool_zero_mode_box.h include - core/bench_profile.h: glibc setenv → malloc+putenv(segfault 回避) - core/hakmem_pool.c: zero mode 参照・制御ロジック - core/box/pool_zero_mode_box.h (新設): enum/getter - CURRENT_TASK.md: Phase ML1 結果記載 ## Test Results | Iterations | ZERO_MODE=full | ZERO_MODE=header | Improvement | |-----------|----------------|-----------------|------------| | 10K | 3.06 M ops/s | 3.17 M ops/s | +3.65% | | 1M | 23.71 M ops/s | 27.34 M ops/s | **+15.34%** | 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
298 lines
16 KiB
C
298 lines
16 KiB
C
// tiny_destructors.c — Tiny の終了処理と統計ダンプを箱化
|
|
#include "tiny_destructors.h"
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
#include "box/tiny_hotheap_v2_box.h"
|
|
#include "box/tiny_front_stats_box.h"
|
|
#include "box/tiny_heap_box.h"
|
|
#include "box/tiny_route_env_box.h"
|
|
#include "box/tls_sll_box.h"
|
|
#include "front/tiny_heap_v2.h"
|
|
#include "hakmem_env_cache.h"
|
|
#include "hakmem_tiny_magazine.h"
|
|
#include "hakmem_tiny_stats_api.h"
|
|
|
|
static int g_flush_on_exit = 0;
|
|
static int g_ultra_debug_on_exit = 0;
|
|
static int g_path_debug_on_exit = 0;
|
|
|
|
// HotHeap v2 stats storage (defined in hakmem_tiny.c)
|
|
extern _Atomic uint64_t g_tiny_hotheap_v2_route_hits[TINY_HOTHEAP_MAX_CLASSES];
|
|
extern _Atomic uint64_t g_tiny_hotheap_v2_alloc_calls[TINY_HOTHEAP_MAX_CLASSES];
|
|
extern _Atomic uint64_t g_tiny_hotheap_v2_alloc_fast[TINY_HOTHEAP_MAX_CLASSES];
|
|
extern _Atomic uint64_t g_tiny_hotheap_v2_alloc_lease[TINY_HOTHEAP_MAX_CLASSES];
|
|
extern _Atomic uint64_t g_tiny_hotheap_v2_alloc_fallback_v1[TINY_HOTHEAP_MAX_CLASSES];
|
|
extern _Atomic uint64_t g_tiny_hotheap_v2_alloc_refill[TINY_HOTHEAP_MAX_CLASSES];
|
|
extern _Atomic uint64_t g_tiny_hotheap_v2_refill_with_current[TINY_HOTHEAP_MAX_CLASSES];
|
|
extern _Atomic uint64_t g_tiny_hotheap_v2_refill_with_partial[TINY_HOTHEAP_MAX_CLASSES];
|
|
extern _Atomic uint64_t g_tiny_hotheap_v2_alloc_route_fb[TINY_HOTHEAP_MAX_CLASSES];
|
|
extern _Atomic uint64_t g_tiny_hotheap_v2_free_calls[TINY_HOTHEAP_MAX_CLASSES];
|
|
extern _Atomic uint64_t g_tiny_hotheap_v2_free_fast[TINY_HOTHEAP_MAX_CLASSES];
|
|
extern _Atomic uint64_t g_tiny_hotheap_v2_free_fallback_v1[TINY_HOTHEAP_MAX_CLASSES];
|
|
extern _Atomic uint64_t g_tiny_hotheap_v2_cold_refill_fail[TINY_HOTHEAP_MAX_CLASSES];
|
|
extern _Atomic uint64_t g_tiny_hotheap_v2_cold_retire_calls[TINY_HOTHEAP_MAX_CLASSES];
|
|
extern _Atomic uint64_t g_tiny_hotheap_v2_retire_calls_v2[TINY_HOTHEAP_MAX_CLASSES];
|
|
extern _Atomic uint64_t g_tiny_hotheap_v2_partial_pushes[TINY_HOTHEAP_MAX_CLASSES];
|
|
extern _Atomic uint64_t g_tiny_hotheap_v2_partial_pops[TINY_HOTHEAP_MAX_CLASSES];
|
|
extern _Atomic uint64_t g_tiny_hotheap_v2_partial_peak[TINY_HOTHEAP_MAX_CLASSES];
|
|
extern TinyHotHeapV2PageStats g_tiny_hotheap_v2_page_stats[TINY_HOTHEAP_MAX_CLASSES];
|
|
|
|
extern _Atomic uint64_t g_tiny_alloc_ge1024[TINY_NUM_CLASSES];
|
|
extern _Atomic uint64_t g_tls_sll_invalid_head[TINY_NUM_CLASSES];
|
|
extern _Atomic uint64_t g_tls_sll_invalid_push[TINY_NUM_CLASSES];
|
|
|
|
static void hak_flush_tiny_exit(void) {
|
|
if (g_flush_on_exit) {
|
|
hak_tiny_magazine_flush_all();
|
|
hak_tiny_trim();
|
|
}
|
|
if (g_ultra_debug_on_exit) {
|
|
hak_tiny_ultra_debug_dump();
|
|
}
|
|
// Path debug dump (optional): HAKMEM_TINY_PATH_DEBUG=1
|
|
hak_tiny_path_debug_dump();
|
|
// Extended counters (optional): HAKMEM_TINY_COUNTERS_DUMP=1
|
|
hak_tiny_debug_counters_dump();
|
|
|
|
// DEBUG: Print SuperSlab accounting stats
|
|
extern _Atomic uint64_t g_ss_active_dec_calls;
|
|
extern _Atomic uint64_t g_hak_tiny_free_calls;
|
|
extern _Atomic uint64_t g_ss_remote_push_calls;
|
|
extern _Atomic uint64_t g_free_ss_enter;
|
|
extern _Atomic uint64_t g_free_local_box_calls;
|
|
extern _Atomic uint64_t g_free_remote_box_calls;
|
|
extern uint64_t g_superslabs_allocated;
|
|
extern uint64_t g_superslabs_freed;
|
|
|
|
fprintf(stderr, "\n[EXIT DEBUG] SuperSlab Accounting:\n");
|
|
fprintf(stderr, " g_superslabs_allocated = %llu\n", (unsigned long long)g_superslabs_allocated);
|
|
fprintf(stderr, " g_superslabs_freed = %llu\n", (unsigned long long)g_superslabs_freed);
|
|
fprintf(stderr, " g_hak_tiny_free_calls = %llu\n",
|
|
(unsigned long long)atomic_load_explicit(&g_hak_tiny_free_calls, memory_order_relaxed));
|
|
fprintf(stderr, " g_ss_remote_push_calls = %llu\n",
|
|
(unsigned long long)atomic_load_explicit(&g_ss_remote_push_calls, memory_order_relaxed));
|
|
fprintf(stderr, " g_ss_active_dec_calls = %llu\n",
|
|
(unsigned long long)atomic_load_explicit(&g_ss_active_dec_calls, memory_order_relaxed));
|
|
extern _Atomic uint64_t g_free_wrapper_calls;
|
|
fprintf(stderr, " g_free_wrapper_calls = %llu\n",
|
|
(unsigned long long)atomic_load_explicit(&g_free_wrapper_calls, memory_order_relaxed));
|
|
fprintf(stderr, " g_free_ss_enter = %llu\n",
|
|
(unsigned long long)atomic_load_explicit(&g_free_ss_enter, memory_order_relaxed));
|
|
fprintf(stderr, " g_free_local_box_calls = %llu\n",
|
|
(unsigned long long)atomic_load_explicit(&g_free_local_box_calls, memory_order_relaxed));
|
|
fprintf(stderr, " g_free_remote_box_calls = %llu\n",
|
|
(unsigned long long)atomic_load_explicit(&g_free_remote_box_calls, memory_order_relaxed));
|
|
}
|
|
|
|
void tiny_destructors_configure_from_env(void) {
|
|
const char* tf = getenv("HAKMEM_TINY_FLUSH_ON_EXIT");
|
|
if (tf && atoi(tf) != 0) {
|
|
g_flush_on_exit = 1;
|
|
}
|
|
const char* ud = getenv("HAKMEM_TINY_ULTRA_DEBUG");
|
|
if (ud && atoi(ud) != 0) {
|
|
g_ultra_debug_on_exit = 1;
|
|
}
|
|
const char* pd = getenv("HAKMEM_TINY_PATH_DEBUG");
|
|
if (pd) {
|
|
g_path_debug_on_exit = 1;
|
|
}
|
|
}
|
|
|
|
void tiny_destructors_register_exit(void) {
|
|
if (g_flush_on_exit || g_ultra_debug_on_exit || g_path_debug_on_exit) {
|
|
atexit(hak_flush_tiny_exit);
|
|
}
|
|
}
|
|
|
|
static int tiny_heap_stats_dump_enabled(void) {
|
|
static int g = -1;
|
|
if (__builtin_expect(g == -1, 0)) {
|
|
const char* eh = getenv("HAKMEM_TINY_HEAP_STATS_DUMP");
|
|
const char* e = getenv("HAKMEM_TINY_C7_HEAP_STATS_DUMP");
|
|
g = ((eh && *eh && *eh != '0') || (e && *e && *e != '0')) ? 1 : 0;
|
|
}
|
|
return g;
|
|
}
|
|
|
|
__attribute__((destructor))
|
|
static void tiny_heap_stats_dump(void) {
|
|
if (!tiny_heap_stats_enabled() || !tiny_heap_stats_dump_enabled()) {
|
|
return;
|
|
}
|
|
for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
|
|
TinyHeapClassStats snap = {
|
|
.alloc_fast_current = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_fast_current, memory_order_relaxed),
|
|
.alloc_slow_prepare = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_slow_prepare, memory_order_relaxed),
|
|
.free_fast_local = atomic_load_explicit(&g_tiny_heap_stats[cls].free_fast_local, memory_order_relaxed),
|
|
.free_slow_fallback = atomic_load_explicit(&g_tiny_heap_stats[cls].free_slow_fallback, memory_order_relaxed),
|
|
.alloc_prepare_fail = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_prepare_fail, memory_order_relaxed),
|
|
.alloc_fail = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_fail, memory_order_relaxed),
|
|
};
|
|
if (snap.alloc_fast_current == 0 && snap.alloc_slow_prepare == 0 &&
|
|
snap.free_fast_local == 0 && snap.free_slow_fallback == 0 &&
|
|
snap.alloc_prepare_fail == 0 && snap.alloc_fail == 0) {
|
|
continue;
|
|
}
|
|
fprintf(stderr,
|
|
"[HEAP_STATS cls=%d] alloc_fast_current=%llu alloc_slow_prepare=%llu free_fast_local=%llu free_slow_fallback=%llu alloc_prepare_fail=%llu alloc_fail=%llu\n",
|
|
cls,
|
|
(unsigned long long)snap.alloc_fast_current,
|
|
(unsigned long long)snap.alloc_slow_prepare,
|
|
(unsigned long long)snap.free_fast_local,
|
|
(unsigned long long)snap.free_slow_fallback,
|
|
(unsigned long long)snap.alloc_prepare_fail,
|
|
(unsigned long long)snap.alloc_fail);
|
|
}
|
|
TinyC7PageStats ps = {
|
|
.prepare_calls = atomic_load_explicit(&g_c7_page_stats.prepare_calls, memory_order_relaxed),
|
|
.prepare_with_current_null = atomic_load_explicit(&g_c7_page_stats.prepare_with_current_null, memory_order_relaxed),
|
|
.prepare_from_partial = atomic_load_explicit(&g_c7_page_stats.prepare_from_partial, memory_order_relaxed),
|
|
.current_set_from_free = atomic_load_explicit(&g_c7_page_stats.current_set_from_free, memory_order_relaxed),
|
|
.current_dropped_to_partial = atomic_load_explicit(&g_c7_page_stats.current_dropped_to_partial, memory_order_relaxed),
|
|
};
|
|
if (ps.prepare_calls || ps.prepare_with_current_null || ps.prepare_from_partial ||
|
|
ps.current_set_from_free || ps.current_dropped_to_partial) {
|
|
fprintf(stderr,
|
|
"[C7_PAGE_STATS] prepare_calls=%llu prepare_with_current_null=%llu prepare_from_partial=%llu current_set_from_free=%llu current_dropped_to_partial=%llu\n",
|
|
(unsigned long long)ps.prepare_calls,
|
|
(unsigned long long)ps.prepare_with_current_null,
|
|
(unsigned long long)ps.prepare_from_partial,
|
|
(unsigned long long)ps.current_set_from_free,
|
|
(unsigned long long)ps.current_dropped_to_partial);
|
|
fflush(stderr);
|
|
}
|
|
}
|
|
|
|
__attribute__((destructor))
|
|
static void tiny_front_class_stats_dump(void) {
|
|
if (!tiny_front_class_stats_dump_enabled()) {
|
|
return;
|
|
}
|
|
for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
|
|
uint64_t a = atomic_load_explicit(&g_tiny_front_alloc_class[cls], memory_order_relaxed);
|
|
uint64_t f = atomic_load_explicit(&g_tiny_front_free_class[cls], memory_order_relaxed);
|
|
if (a == 0 && f == 0) {
|
|
continue;
|
|
}
|
|
fprintf(stderr, "[FRONT_CLASS cls=%d] alloc=%llu free=%llu\n",
|
|
cls, (unsigned long long)a, (unsigned long long)f);
|
|
}
|
|
}
|
|
|
|
__attribute__((destructor))
|
|
static void tiny_c7_delta_debug_destructor(void) {
|
|
if (tiny_c7_meta_light_enabled() && tiny_c7_delta_debug_enabled()) {
|
|
tiny_c7_heap_debug_dump_deltas();
|
|
}
|
|
if (tiny_heap_meta_light_enabled_for_class(6) && tiny_c6_delta_debug_enabled()) {
|
|
tiny_c6_heap_debug_dump_deltas();
|
|
}
|
|
}
|
|
|
|
__attribute__((destructor))
|
|
static void tiny_hotheap_v2_stats_dump(void) {
|
|
if (!tiny_hotheap_v2_stats_enabled()) {
|
|
return;
|
|
}
|
|
for (uint8_t ci = 0; ci < TINY_HOTHEAP_MAX_CLASSES; ci++) {
|
|
uint64_t alloc_calls = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_calls[ci], memory_order_relaxed);
|
|
uint64_t route_hits = atomic_load_explicit(&g_tiny_hotheap_v2_route_hits[ci], memory_order_relaxed);
|
|
uint64_t alloc_fast = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_fast[ci], memory_order_relaxed);
|
|
uint64_t alloc_lease = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_lease[ci], memory_order_relaxed);
|
|
uint64_t alloc_fb = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_fallback_v1[ci], memory_order_relaxed);
|
|
uint64_t free_calls = atomic_load_explicit(&g_tiny_hotheap_v2_free_calls[ci], memory_order_relaxed);
|
|
uint64_t free_fast = atomic_load_explicit(&g_tiny_hotheap_v2_free_fast[ci], memory_order_relaxed);
|
|
uint64_t free_fb = atomic_load_explicit(&g_tiny_hotheap_v2_free_fallback_v1[ci], memory_order_relaxed);
|
|
uint64_t cold_refill_fail = atomic_load_explicit(&g_tiny_hotheap_v2_cold_refill_fail[ci], memory_order_relaxed);
|
|
uint64_t cold_retire_calls = atomic_load_explicit(&g_tiny_hotheap_v2_cold_retire_calls[ci], memory_order_relaxed);
|
|
uint64_t retire_calls_v2 = atomic_load_explicit(&g_tiny_hotheap_v2_retire_calls_v2[ci], memory_order_relaxed);
|
|
uint64_t partial_pushes = atomic_load_explicit(&g_tiny_hotheap_v2_partial_pushes[ci], memory_order_relaxed);
|
|
uint64_t partial_pops = atomic_load_explicit(&g_tiny_hotheap_v2_partial_pops[ci], memory_order_relaxed);
|
|
uint64_t partial_peak = atomic_load_explicit(&g_tiny_hotheap_v2_partial_peak[ci], memory_order_relaxed);
|
|
uint64_t refill_with_cur = atomic_load_explicit(&g_tiny_hotheap_v2_refill_with_current[ci], memory_order_relaxed);
|
|
uint64_t refill_with_partial = atomic_load_explicit(&g_tiny_hotheap_v2_refill_with_partial[ci], memory_order_relaxed);
|
|
|
|
TinyHotHeapV2PageStats ps = {
|
|
.prepare_calls = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_calls, memory_order_relaxed),
|
|
.prepare_with_current_null = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_with_current_null, memory_order_relaxed),
|
|
.prepare_from_partial = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_from_partial, memory_order_relaxed),
|
|
.free_made_current = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].free_made_current, memory_order_relaxed),
|
|
.page_retired = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].page_retired, memory_order_relaxed),
|
|
};
|
|
|
|
if (!(alloc_calls || alloc_fast || alloc_lease || alloc_fb || free_calls || free_fast || free_fb ||
|
|
ps.prepare_calls || ps.prepare_with_current_null || ps.prepare_from_partial ||
|
|
ps.free_made_current || ps.page_retired || retire_calls_v2 || partial_pushes || partial_pops || partial_peak)) {
|
|
continue;
|
|
}
|
|
|
|
tiny_route_kind_t route_kind = tiny_route_for_class(ci);
|
|
fprintf(stderr,
|
|
"[HOTHEAP_V2_STATS cls=%u route=%d] route_hits=%llu alloc_calls=%llu alloc_fast=%llu alloc_lease=%llu alloc_refill=%llu refill_cur=%llu refill_partial=%llu alloc_fb_v1=%llu alloc_route_fb=%llu cold_refill_fail=%llu cold_retire_calls=%llu retire_v2=%llu free_calls=%llu free_fast=%llu free_fb_v1=%llu prep_calls=%llu prep_null=%llu prep_from_partial=%llu free_made_current=%llu page_retired=%llu partial_push=%llu partial_pop=%llu partial_peak=%llu\n",
|
|
(unsigned)ci,
|
|
(int)route_kind,
|
|
(unsigned long long)route_hits,
|
|
(unsigned long long)alloc_calls,
|
|
(unsigned long long)alloc_fast,
|
|
(unsigned long long)alloc_lease,
|
|
(unsigned long long)atomic_load_explicit(&g_tiny_hotheap_v2_alloc_refill[ci], memory_order_relaxed),
|
|
(unsigned long long)refill_with_cur,
|
|
(unsigned long long)refill_with_partial,
|
|
(unsigned long long)alloc_fb,
|
|
(unsigned long long)atomic_load_explicit(&g_tiny_hotheap_v2_alloc_route_fb[ci], memory_order_relaxed),
|
|
(unsigned long long)cold_refill_fail,
|
|
(unsigned long long)cold_retire_calls,
|
|
(unsigned long long)retire_calls_v2,
|
|
(unsigned long long)free_calls,
|
|
(unsigned long long)free_fast,
|
|
(unsigned long long)free_fb,
|
|
(unsigned long long)ps.prepare_calls,
|
|
(unsigned long long)ps.prepare_with_current_null,
|
|
(unsigned long long)ps.prepare_from_partial,
|
|
(unsigned long long)ps.free_made_current,
|
|
(unsigned long long)ps.page_retired,
|
|
(unsigned long long)partial_pushes,
|
|
(unsigned long long)partial_pops,
|
|
(unsigned long long)partial_peak);
|
|
}
|
|
}
|
|
|
|
static void tiny_heap_v2_stats_atexit(void) __attribute__((destructor));
|
|
static void tiny_heap_v2_stats_atexit(void) {
|
|
tiny_heap_v2_print_stats();
|
|
}
|
|
|
|
static void tiny_alloc_1024_diag_atexit(void) __attribute__((destructor));
|
|
static void tiny_alloc_1024_diag_atexit(void) {
|
|
// Priority-2: Use cached ENV
|
|
if (!HAK_ENV_TINY_ALLOC_1024_METRIC()) return;
|
|
fprintf(stderr, "\n[ALLOC_GE1024] per-class counts (size>=1024)\n");
|
|
for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
|
|
uint64_t v = atomic_load_explicit(&g_tiny_alloc_ge1024[cls], memory_order_relaxed);
|
|
if (v) {
|
|
fprintf(stderr, " C%d=%llu", cls, (unsigned long long)v);
|
|
}
|
|
}
|
|
fprintf(stderr, "\n");
|
|
}
|
|
|
|
static void tiny_tls_sll_diag_atexit(void) __attribute__((destructor));
|
|
static void tiny_tls_sll_diag_atexit(void) {
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
// Priority-2: Use cached ENV
|
|
if (!HAK_ENV_TINY_SLL_DIAG()) return;
|
|
fprintf(stderr, "\n[TLS_SLL_DIAG] invalid head/push counts per class\n");
|
|
for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
|
|
uint64_t ih = atomic_load_explicit(&g_tls_sll_invalid_head[cls], memory_order_relaxed);
|
|
uint64_t ip = atomic_load_explicit(&g_tls_sll_invalid_push[cls], memory_order_relaxed);
|
|
if (ih || ip) {
|
|
fprintf(stderr, " C%d: invalid_head=%llu invalid_push=%llu\n",
|
|
cls, (unsigned long long)ih, (unsigned long long)ip);
|
|
}
|
|
}
|
|
#endif
|
|
}
|