Files
hakmem/core/hakmem_tiny_stats.c

744 lines
34 KiB
C
Raw Normal View History

// hakmem_tiny_stats.c
// Phase 2, Module 1: Statistics and Debug Functions
// Extracted from hakmem_tiny.c (lines 4348-4728, non-contiguous)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <stdatomic.h>
#include <unistd.h>
#include "hakmem_tiny.h"
#include "hakmem_tiny_config.h" // extern g_tiny_class_sizes
#include "hakmem_tiny_stats_api.h"
#include <signal.h>
static int g_dump_atexit_only = -1; // env: HAKMEM_TINY_DUMP_ATEXIT_ONLY=1
// Forward declaration of local dump (defined later in this file)
static void hak_tiny_refill_counters_dump(void);
static void hak_tiny_stats_init_flags(void) {
if (g_dump_atexit_only == -1) {
const char* e = getenv("HAKMEM_TINY_DUMP_ATEXIT_ONLY");
g_dump_atexit_only = (e && atoi(e) != 0) ? 1 : 0;
}
}
void hak_tiny_dump_all_counters_now(void) {
// Dump both minimal and extended (if compiled)
hak_tiny_refill_counters_dump();
hak_tiny_debug_counters_dump();
}
_Atomic int g_tiny_sukesuke_pending = 0;
_Atomic int g_tiny_sukesuke_dumping = 0;
static void hak_tiny_sig_handler(int signo) {
(void)signo;
atomic_store_explicit(&g_tiny_sukesuke_pending, 1, memory_order_release);
static const char msg[] = "[SUKESUKE] dump requested\n";
ssize_t written = write(STDERR_FILENO, msg, sizeof(msg) - 1);
(void)written;
}
void hak_tiny_stats_handle_signal(void) {
int pending = atomic_exchange_explicit(&g_tiny_sukesuke_pending, 0, memory_order_acq_rel);
if (!pending) return;
if (atomic_exchange_explicit(&g_tiny_sukesuke_dumping, 1, memory_order_acq_rel) != 0) {
// Another thread is already dumping; restore request for later.
atomic_store_explicit(&g_tiny_sukesuke_pending, 1, memory_order_release);
return;
}
hak_tiny_dump_all_counters_now();
atomic_store_explicit(&g_tiny_sukesuke_dumping, 0, memory_order_release);
}
void hak_tiny_enable_signal_dump(void) {
const char* s = getenv("HAKMEM_TINY_SUKESUKE");
if (!(s && atoi(s) != 0)) return;
struct sigaction sa;
sa.sa_handler = hak_tiny_sig_handler;
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_RESTART;
sigaction(SIGUSR1, &sa, NULL);
fprintf(stderr, "[SUKESUKE] SIGUSR1 dump enabled\n");
}
__attribute__((constructor))
static void hak_tiny_signal_dump_ctor(void) {
// Early install to catch signals before tiny init
const char* s = getenv("HAKMEM_TINY_SUKESUKE");
if (s && atoi(s) != 0) {
hak_tiny_enable_signal_dump();
}
}
#include "hakmem_tiny_superslab.h"
#include "hakmem_config.h"
#include "hakmem_tiny_stats.h"
// ============================================================================
// Phase 8.1: Public Statistics API (lines 4348-4415)
// ============================================================================
void hak_tiny_get_stats(uint64_t* alloc_count, uint64_t* free_count, uint64_t* slab_count) {
if (!g_tiny_initialized) return;
#ifdef HAKMEM_ENABLE_STATS
// Flush TLS batches to global counters for accurate stats
stats_flush_all();
#endif
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
if (alloc_count) alloc_count[i] = g_tiny_pool.alloc_count[i];
if (free_count) free_count[i] = g_tiny_pool.free_count[i];
if (slab_count) slab_count[i] = g_tiny_pool.slab_count[i];
}
}
void hak_tiny_print_stats(void) {
if (!g_tiny_initialized) {
printf("Tiny Pool not initialized\n");
return;
}
printf("\n");
printf("Tiny Pool Statistics\n");
printf("========================================\n");
printf("Class | Size | Allocs | Frees | Slabs\n");
printf("------|--------|---------|---------|-------\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
printf(" %d | %4zuB | %7lu | %7lu | %5lu\n",
i,
g_tiny_class_sizes[i],
(unsigned long)g_tiny_pool.alloc_count[i],
(unsigned long)g_tiny_pool.free_count[i],
(unsigned long)g_tiny_pool.slab_count[i]);
}
printf("========================================\n");
printf("\n");
#if HAKMEM_BUILD_DEBUG
printf("TLS Debug Counters (hit/miss/spill) per class\n");
printf("---------------------------------------------\n");
printf("Class | Hit | Miss | SpillSS | SpillOwner | SpillMag | SpillReq\n");
printf("------+-----------+-----------+-----------+-----------+-----------+-----------\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
printf(" %d | %9llu | %9llu | %9llu | %9llu | %9llu | %9llu\n",
i,
(unsigned long long)g_tls_hit_count[i],
(unsigned long long)g_tls_miss_count[i],
(unsigned long long)g_tls_spill_ss_count[i],
(unsigned long long)g_tls_spill_owner_count[i],
(unsigned long long)g_tls_spill_mag_count[i],
(unsigned long long)g_tls_spill_requeue_count[i]);
}
printf("---------------------------------------------\n\n");
// Observation snapshot (disabled unless Tiny obs is explicitly enabled)
#ifdef HAKMEM_TINY_OBS_ENABLE
extern unsigned long long g_obs_epoch;
extern unsigned int g_obs_interval;
typedef struct {
unsigned long long hit, miss, spill_ss, spill_owner, spill_mag, spill_requeue;
} TinyObsStats;
extern TinyObsStats g_obs_stats[TINY_NUM_CLASSES];
printf("Observation Snapshot (epoch %llu, interval %u events)\n",
(unsigned long long)g_obs_epoch,
g_obs_interval);
printf("Class | dHit | dMiss | dSpSS | dSpOwn | dSpMag | dSpReq\n");
printf("------+-----------+-----------+-----------+-----------+-----------+-----------\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
TinyObsStats* st = &g_obs_stats[i];
printf(" %d | %9llu | %9llu | %9llu | %9llu | %9llu | %9llu\n",
i,
(unsigned long long)st->hit,
(unsigned long long)st->miss,
(unsigned long long)st->spill_ss,
(unsigned long long)st->spill_owner,
(unsigned long long)st->spill_mag,
(unsigned long long)st->spill_requeue);
}
printf("---------------------------------------------\n\n");
#else
printf("Observation Snapshot: disabled (build-time)\n\n");
#endif
#endif
}
// ============================================================================
// Phase 8.2: Memory Profiling Debug (toggle with HAKMEM_DEBUG_MEMORY)
// ============================================================================
#ifdef HAKMEM_DEBUG_MEMORY
// NOTE: count_active_superslabs and hak_tiny_print_memory_profile are currently disabled
#else
// Stub function when debug is disabled
void hak_tiny_print_memory_profile(void) {
// No-op in release builds
}
#endif // HAKMEM_DEBUG_MEMORY
// ============================================================================
// Debug Print Functions (always available, gated by HAKMEM_DEBUG_COUNTERS)
// ============================================================================
// Debug print for Ultra Tiny counters
#if HAKMEM_DEBUG_COUNTERS
void hak_tiny_ultra_debug_dump(void) {
// NOTE: Ultra Tiny counters (pop_hits, refills, resets, sll_count) are currently not tracked
// Uncomment when these variables are implemented
/*
fprintf(stderr, "\n[Ultra Tiny Debug]\n");
fprintf(stderr, "class, pop_hits, refills, resets, sll_count\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu,%llu,%u\n",
i,
(unsigned long long)g_ultra_pop_hits[i],
(unsigned long long)g_ultra_refill_calls[i],
(unsigned long long)g_ultra_resets[i],
(unsigned)g_tls_sll[i].count);
}
*/
}
#else
void hak_tiny_ultra_debug_dump(void) { /* no-op in release builds */ }
#endif
// Debug print for normal path counters (SLL/MAG/FRONT/SUPER)
void hak_tiny_path_debug_dump(void) {
#if HAKMEM_DEBUG_COUNTERS
const char* on = getenv("HAKMEM_TINY_PATH_DEBUG");
if (!(on && atoi(on) != 0)) return;
// NOTE: Path debug counters (sll_pop, mag_pop, etc.) are currently not tracked
// Uncomment when these variables are implemented
/*
fprintf(stderr, "\n[Tiny Path Debug]\n");
fprintf(stderr, "class, sll_pop, mag_pop, front_pop, superslab, refills\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu,%llu,%llu,%llu\n",
i,
(unsigned long long)g_path_sll_pop[i],
(unsigned long long)g_path_mag_pop[i],
(unsigned long long)g_path_front_pop[i],
(unsigned long long)g_path_superslab[i],
(unsigned long long)g_path_refill_calls[i]);
}
*/
(void)on;
#else
(void)getenv; // suppress unused warnings when compiled out
#endif
}
// Debug print for extended counters (slow/bin/bump/spec)
void hak_tiny_debug_counters_dump(void) {
#if HAKMEM_DEBUG_COUNTERS
const char* on = getenv("HAKMEM_TINY_COUNTERS_DUMP");
if (!(on && atoi(on) != 0)) return;
// NOTE: Extended counters (alloc_slow, bitmap_scans, etc.) are currently not tracked
// Uncomment when these variables are implemented
/*
fprintf(stderr, "\n[Tiny Extended Counters]\n");
fprintf(stderr, "class, alloc_slow, ss_refill, bitmap_scans, bin_pops, bump_hits, bump_arms, spec_calls, spec_hits\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu\n",
i,
(unsigned long long)g_alloc_slow_calls[i],
(unsigned long long)g_superslab_refill_calls_dbg[i],
(unsigned long long)g_bitmap_scan_calls[i],
(unsigned long long)g_bgbin_pops[i],
(unsigned long long)g_bump_hits[i],
(unsigned long long)g_bump_arms[i],
(unsigned long long)g_spec_calls[i],
(unsigned long long)g_spec_hits[i]);
}
*/
// SuperSlab adopt/publish debug
extern unsigned long long g_ss_publish_dbg[];
extern unsigned long long g_ss_adopt_dbg[];
fprintf(stderr, "\n[SS Adopt/Publish Counters]\n");
fprintf(stderr, "class, ss_publish, ss_adopt\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu\n", i,
(unsigned long long)g_ss_publish_dbg[i],
(unsigned long long)g_ss_adopt_dbg[i]);
}
// Refill-stage counters
extern unsigned long long g_rf_total_calls[];
extern unsigned long long g_rf_hit_bench[];
extern unsigned long long g_rf_hit_hot[];
CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消 **問題:** - Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走) - System/mimalloc は 4T で 33.52M ops/s 正常動作 - SS OFF + Remote OFF でも 4T で SEGV **根本原因: (Task agent ultrathink 調査結果)** ``` CRASH: mov (%r15),%r13 R15 = 0x6261 ← ASCII "ba" (ゴミ値、未初期化TLS) ``` Worker スレッドの TLS 変数が未初期化: - `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];` ← 初期化なし - pthread_create() で生成されたスレッドでゼロ初期化されない - NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV **修正内容:** 全 TLS 配列に明示的初期化子 `= {0}` を追加: 1. **core/hakmem_tiny.c:** - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}` - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}` - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}` - `g_tls_bcur[TINY_NUM_CLASSES] = {0}` - `g_tls_bend[TINY_NUM_CLASSES] = {0}` 2. **core/tiny_fastcache.c:** - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}` 3. **core/hakmem_tiny_magazine.c:** - `g_tls_mags[TINY_NUM_CLASSES] = {0}` 4. **core/tiny_sticky.c:** - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}` **効果:** ``` Before: 1T: 2.09M ✅ | 4T: SEGV 💀 After: 1T: 2.41M ✅ | 4T: 4.19M ✅ (+15% 1T, SEGV解消) ``` **テスト:** ```bash # 1 thread: 完走 ./larson_hakmem 2 8 128 1024 1 12345 1 → Throughput = 2,407,597 ops/s ✅ # 4 threads: 完走(以前は SEGV) ./larson_hakmem 2 8 128 1024 1 12345 4 → Throughput = 4,192,155 ops/s ✅ ``` **調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00
extern unsigned long long g_rf_hit_ready[];
extern unsigned long long g_rf_hit_slab[];
extern unsigned long long g_rf_hit_ss[];
extern unsigned long long g_rf_hit_reg[];
extern unsigned long long g_rf_mmap_calls[];
fprintf(stderr, "\n[Refill Stage Counters]\n");
CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消 **問題:** - Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走) - System/mimalloc は 4T で 33.52M ops/s 正常動作 - SS OFF + Remote OFF でも 4T で SEGV **根本原因: (Task agent ultrathink 調査結果)** ``` CRASH: mov (%r15),%r13 R15 = 0x6261 ← ASCII "ba" (ゴミ値、未初期化TLS) ``` Worker スレッドの TLS 変数が未初期化: - `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];` ← 初期化なし - pthread_create() で生成されたスレッドでゼロ初期化されない - NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV **修正内容:** 全 TLS 配列に明示的初期化子 `= {0}` を追加: 1. **core/hakmem_tiny.c:** - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}` - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}` - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}` - `g_tls_bcur[TINY_NUM_CLASSES] = {0}` - `g_tls_bend[TINY_NUM_CLASSES] = {0}` 2. **core/tiny_fastcache.c:** - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}` 3. **core/hakmem_tiny_magazine.c:** - `g_tls_mags[TINY_NUM_CLASSES] = {0}` 4. **core/tiny_sticky.c:** - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}` **効果:** ``` Before: 1T: 2.09M ✅ | 4T: SEGV 💀 After: 1T: 2.41M ✅ | 4T: 4.19M ✅ (+15% 1T, SEGV解消) ``` **テスト:** ```bash # 1 thread: 完走 ./larson_hakmem 2 8 128 1024 1 12345 1 → Throughput = 2,407,597 ops/s ✅ # 4 threads: 完走(以前は SEGV) ./larson_hakmem 2 8 128 1024 1 12345 4 → Throughput = 4,192,155 ops/s ✅ ``` **調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00
fprintf(stderr, "class, total, ready, bench, hot, slab, ss, reg, mmap\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消 **問題:** - Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走) - System/mimalloc は 4T で 33.52M ops/s 正常動作 - SS OFF + Remote OFF でも 4T で SEGV **根本原因: (Task agent ultrathink 調査結果)** ``` CRASH: mov (%r15),%r13 R15 = 0x6261 ← ASCII "ba" (ゴミ値、未初期化TLS) ``` Worker スレッドの TLS 変数が未初期化: - `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];` ← 初期化なし - pthread_create() で生成されたスレッドでゼロ初期化されない - NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV **修正内容:** 全 TLS 配列に明示的初期化子 `= {0}` を追加: 1. **core/hakmem_tiny.c:** - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}` - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}` - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}` - `g_tls_bcur[TINY_NUM_CLASSES] = {0}` - `g_tls_bend[TINY_NUM_CLASSES] = {0}` 2. **core/tiny_fastcache.c:** - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}` 3. **core/hakmem_tiny_magazine.c:** - `g_tls_mags[TINY_NUM_CLASSES] = {0}` 4. **core/tiny_sticky.c:** - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}` **効果:** ``` Before: 1T: 2.09M ✅ | 4T: SEGV 💀 After: 1T: 2.41M ✅ | 4T: 4.19M ✅ (+15% 1T, SEGV解消) ``` **テスト:** ```bash # 1 thread: 完走 ./larson_hakmem 2 8 128 1024 1 12345 1 → Throughput = 2,407,597 ops/s ✅ # 4 threads: 完走(以前は SEGV) ./larson_hakmem 2 8 128 1024 1 12345 4 → Throughput = 4,192,155 ops/s ✅ ``` **調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00
fprintf(stderr, "%d,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu\n", i,
(unsigned long long)g_rf_total_calls[i],
CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消 **問題:** - Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走) - System/mimalloc は 4T で 33.52M ops/s 正常動作 - SS OFF + Remote OFF でも 4T で SEGV **根本原因: (Task agent ultrathink 調査結果)** ``` CRASH: mov (%r15),%r13 R15 = 0x6261 ← ASCII "ba" (ゴミ値、未初期化TLS) ``` Worker スレッドの TLS 変数が未初期化: - `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];` ← 初期化なし - pthread_create() で生成されたスレッドでゼロ初期化されない - NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV **修正内容:** 全 TLS 配列に明示的初期化子 `= {0}` を追加: 1. **core/hakmem_tiny.c:** - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}` - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}` - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}` - `g_tls_bcur[TINY_NUM_CLASSES] = {0}` - `g_tls_bend[TINY_NUM_CLASSES] = {0}` 2. **core/tiny_fastcache.c:** - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}` 3. **core/hakmem_tiny_magazine.c:** - `g_tls_mags[TINY_NUM_CLASSES] = {0}` 4. **core/tiny_sticky.c:** - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}` **効果:** ``` Before: 1T: 2.09M ✅ | 4T: SEGV 💀 After: 1T: 2.41M ✅ | 4T: 4.19M ✅ (+15% 1T, SEGV解消) ``` **テスト:** ```bash # 1 thread: 完走 ./larson_hakmem 2 8 128 1024 1 12345 1 → Throughput = 2,407,597 ops/s ✅ # 4 threads: 完走(以前は SEGV) ./larson_hakmem 2 8 128 1024 1 12345 4 → Throughput = 4,192,155 ops/s ✅ ``` **調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00
(unsigned long long)g_rf_hit_ready[i],
(unsigned long long)g_rf_hit_bench[i],
(unsigned long long)g_rf_hit_hot[i],
(unsigned long long)g_rf_hit_slab[i],
(unsigned long long)g_rf_hit_ss[i],
(unsigned long long)g_rf_hit_reg[i],
(unsigned long long)g_rf_mmap_calls[i]);
}
CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消 **問題:** - Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走) - System/mimalloc は 4T で 33.52M ops/s 正常動作 - SS OFF + Remote OFF でも 4T で SEGV **根本原因: (Task agent ultrathink 調査結果)** ``` CRASH: mov (%r15),%r13 R15 = 0x6261 ← ASCII "ba" (ゴミ値、未初期化TLS) ``` Worker スレッドの TLS 変数が未初期化: - `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];` ← 初期化なし - pthread_create() で生成されたスレッドでゼロ初期化されない - NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV **修正内容:** 全 TLS 配列に明示的初期化子 `= {0}` を追加: 1. **core/hakmem_tiny.c:** - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}` - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}` - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}` - `g_tls_bcur[TINY_NUM_CLASSES] = {0}` - `g_tls_bend[TINY_NUM_CLASSES] = {0}` 2. **core/tiny_fastcache.c:** - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}` 3. **core/hakmem_tiny_magazine.c:** - `g_tls_mags[TINY_NUM_CLASSES] = {0}` 4. **core/tiny_sticky.c:** - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}` **効果:** ``` Before: 1T: 2.09M ✅ | 4T: SEGV 💀 After: 1T: 2.41M ✅ | 4T: 4.19M ✅ (+15% 1T, SEGV解消) ``` **テスト:** ```bash # 1 thread: 完走 ./larson_hakmem 2 8 128 1024 1 12345 1 → Throughput = 2,407,597 ops/s ✅ # 4 threads: 完走(以前は SEGV) ./larson_hakmem 2 8 128 1024 1 12345 4 → Throughput = 4,192,155 ops/s ✅ ``` **調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00
// Refill item sources (freelist vs carve)
extern unsigned long long g_rf_freelist_items[];
extern unsigned long long g_rf_carve_items[];
fprintf(stderr, "\n[Refill Item Sources]\n");
fprintf(stderr, "class, freelist_items, carve_items\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu\n", i,
(unsigned long long)g_rf_freelist_items[i],
(unsigned long long)g_rf_carve_items[i]);
}
// Refill item sources (freelist vs carve)
extern unsigned long long g_rf_freelist_items[];
extern unsigned long long g_rf_carve_items[];
fprintf(stderr, "\n[Refill Item Sources]\n");
fprintf(stderr, "class, freelist_items, carve_items\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu\n", i,
(unsigned long long)g_rf_freelist_items[i],
(unsigned long long)g_rf_carve_items[i]);
}
// Diagnostic: refill early return counters
extern unsigned long long g_rf_early_no_ss[];
extern unsigned long long g_rf_early_no_meta[];
extern unsigned long long g_rf_early_no_room[];
extern unsigned long long g_rf_early_want_zero[];
fprintf(stderr, "\n[Refill Early Returns - Diagnostic]\n");
fprintf(stderr, "class, no_ss, no_meta, no_room, want_zero\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu,%llu,%llu\n", i,
(unsigned long long)g_rf_early_no_ss[i],
(unsigned long long)g_rf_early_no_meta[i],
(unsigned long long)g_rf_early_no_room[i],
(unsigned long long)g_rf_early_want_zero[i]);
}
// Slab-ring counters
extern unsigned long long g_slab_publish_dbg[];
extern unsigned long long g_slab_adopt_dbg[];
extern unsigned long long g_slab_requeue_dbg[];
extern unsigned long long g_slab_miss_dbg[];
fprintf(stderr, "\n[Slab Adopt/Publish Counters]\n");
fprintf(stderr, "class, slab_publish, slab_adopt, slab_requeue, slab_miss\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu,%llu,%llu\n", i,
(unsigned long long)g_slab_publish_dbg[i],
(unsigned long long)g_slab_adopt_dbg[i],
(unsigned long long)g_slab_requeue_dbg[i],
(unsigned long long)g_slab_miss_dbg[i]);
}
// Publish-side counters
extern unsigned long long g_pub_bench_hits[];
extern unsigned long long g_pub_hot_hits[];
extern unsigned long long g_pub_mail_hits[];
fprintf(stderr, "\n[Publish Hits]\n");
fprintf(stderr, "class, pub_mail, pub_bench, pub_hot\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu,%llu\n", i,
(unsigned long long)g_pub_mail_hits[i],
(unsigned long long)g_pub_bench_hits[i],
(unsigned long long)g_pub_hot_hits[i]);
}
CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消 **問題:** - Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走) - System/mimalloc は 4T で 33.52M ops/s 正常動作 - SS OFF + Remote OFF でも 4T で SEGV **根本原因: (Task agent ultrathink 調査結果)** ``` CRASH: mov (%r15),%r13 R15 = 0x6261 ← ASCII "ba" (ゴミ値、未初期化TLS) ``` Worker スレッドの TLS 変数が未初期化: - `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];` ← 初期化なし - pthread_create() で生成されたスレッドでゼロ初期化されない - NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV **修正内容:** 全 TLS 配列に明示的初期化子 `= {0}` を追加: 1. **core/hakmem_tiny.c:** - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}` - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}` - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}` - `g_tls_bcur[TINY_NUM_CLASSES] = {0}` - `g_tls_bend[TINY_NUM_CLASSES] = {0}` 2. **core/tiny_fastcache.c:** - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}` 3. **core/hakmem_tiny_magazine.c:** - `g_tls_mags[TINY_NUM_CLASSES] = {0}` 4. **core/tiny_sticky.c:** - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}` **効果:** ``` Before: 1T: 2.09M ✅ | 4T: SEGV 💀 After: 1T: 2.41M ✅ | 4T: 4.19M ✅ (+15% 1T, SEGV解消) ``` **テスト:** ```bash # 1 thread: 完走 ./larson_hakmem 2 8 128 1024 1 12345 1 → Throughput = 2,407,597 ops/s ✅ # 4 threads: 完走(以前は SEGV) ./larson_hakmem 2 8 128 1024 1 12345 4 → Throughput = 4,192,155 ops/s ✅ ``` **調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00
// Front Gate Breakdown (SFC/SLL/Quick/Mag)
extern unsigned long long g_front_sfc_hit[];
extern unsigned long long g_front_sll_hit[];
extern unsigned long long g_front_quick_hit[];
extern unsigned long long g_front_mag_hit[];
fprintf(stderr, "\n[Front Gate Breakdown]\n");
fprintf(stderr, "class, sfc_hit, sll_hit, quick_hit, mag_hit\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu,%llu,%llu\n", i,
(unsigned long long)g_front_sfc_hit[i],
(unsigned long long)g_front_sll_hit[i],
(unsigned long long)g_front_quick_hit[i],
(unsigned long long)g_front_mag_hit[i]);
}
// Free Triggers (first-free / remote transition)
extern unsigned long long g_first_free_transitions[];
extern unsigned long long g_remote_free_transitions[];
fprintf(stderr, "\n[Free Triggers]\n");
fprintf(stderr, "class, first_free, remote_transition\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu\n", i,
(unsigned long long)g_first_free_transitions[i],
(unsigned long long)g_remote_free_transitions[i]);
}
// Adopt/Registry Gate
extern unsigned long long g_adopt_gate_calls[];
extern unsigned long long g_adopt_gate_success[];
extern unsigned long long g_reg_scan_attempts[];
extern unsigned long long g_reg_scan_hits[];
fprintf(stderr, "\n[Adopt/Registry Gate]\n");
fprintf(stderr, "class, adopt_calls, adopt_success, reg_scans, reg_hits\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu,%llu,%llu\n", i,
(unsigned long long)g_adopt_gate_calls[i],
(unsigned long long)g_adopt_gate_success[i],
(unsigned long long)g_reg_scan_attempts[i],
(unsigned long long)g_reg_scan_hits[i]);
}
// SuperSlab Registry (per-class sizes)
extern int g_super_reg_class_size[];
fprintf(stderr, "\n[SuperSlab Registry]\n");
fprintf(stderr, "class, reg_size\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%d\n", i, g_super_reg_class_size[i]);
}
extern unsigned long long g_fast_push_hits[];
extern unsigned long long g_fast_push_full[];
extern unsigned long long g_fast_push_disabled[];
extern unsigned long long g_fast_push_zero_cap[];
extern unsigned long long g_fast_push_gate_disabled[];
extern unsigned long long g_fast_push_gate_zero_cap[];
extern unsigned long long g_fast_spare_attempts[];
extern unsigned long long g_fast_spare_disabled[];
extern unsigned long long g_fast_spare_empty[];
extern unsigned long long g_fast_spare_lookup_fail[];
extern unsigned long long g_fast_spare_bad_index[];
extern unsigned long long g_fast_lookup_ss[];
extern unsigned long long g_fast_lookup_slab[];
extern unsigned long long g_fast_lookup_none;
fprintf(stderr, "\n[Fast Cache Debug]\n");
fprintf(stderr, "class, push_hits, push_full, push_disabled, push_zero_cap, gate_disabled, gate_zero_cap, spare_attempts, spare_disabled, spare_empty, spare_lookup_fail, spare_bad_index, lookup_ss, lookup_slab\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu\n", i,
(unsigned long long)g_fast_push_hits[i],
(unsigned long long)g_fast_push_full[i],
(unsigned long long)g_fast_push_disabled[i],
(unsigned long long)g_fast_push_zero_cap[i],
(unsigned long long)g_fast_push_gate_disabled[i],
(unsigned long long)g_fast_push_gate_zero_cap[i],
(unsigned long long)g_fast_spare_attempts[i],
(unsigned long long)g_fast_spare_disabled[i],
(unsigned long long)g_fast_spare_empty[i],
(unsigned long long)g_fast_spare_lookup_fail[i],
(unsigned long long)g_fast_spare_bad_index[i],
(unsigned long long)g_fast_lookup_ss[i],
(unsigned long long)g_fast_lookup_slab[i]);
}
fprintf(stderr, "lookup_none,%llu\n", (unsigned long long)g_fast_lookup_none);
extern uint64_t g_ss_cache_hits[];
extern uint64_t g_ss_cache_misses[];
extern uint64_t g_ss_cache_puts[];
extern uint64_t g_ss_cache_drops[];
extern uint64_t g_ss_cache_precharged[];
extern uint64_t g_superslabs_reused;
extern uint64_t g_superslabs_cached;
fprintf(stderr, "\n[SS Cache Stats]\n");
fprintf(stderr, "class, cache_hits, cache_misses, cache_puts, cache_drops, precharged\n");
for (int i = 0; i < 8; i++) {
fprintf(stderr, "%d,%llu,%llu,%llu,%llu,%llu\n", i,
(unsigned long long)g_ss_cache_hits[i],
(unsigned long long)g_ss_cache_misses[i],
(unsigned long long)g_ss_cache_puts[i],
(unsigned long long)g_ss_cache_drops[i],
(unsigned long long)g_ss_cache_precharged[i]);
}
fprintf(stderr, "cache_reused=%llu cache_cached=%llu\n",
(unsigned long long)g_superslabs_reused,
(unsigned long long)g_superslabs_cached);
// Free pipeline
extern unsigned long long g_free_via_ss_local[];
extern unsigned long long g_free_via_ss_remote[];
extern unsigned long long g_free_via_tls_sll[];
extern unsigned long long g_free_via_mag[];
extern unsigned long long g_free_via_fast_tls[];
extern unsigned long long g_free_via_fastcache[];
extern unsigned long long g_fast_spare_flush[];
fprintf(stderr, "\n[Free Pipeline]\n");
fprintf(stderr, "class, ss_local, ss_remote, fast_tls, fast_cache, tls_sll, magazine, fast_spare_flush\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu,%llu,%llu,%llu,%llu,%llu\n", i,
(unsigned long long)g_free_via_ss_local[i],
(unsigned long long)g_free_via_ss_remote[i],
(unsigned long long)g_free_via_fast_tls[i],
(unsigned long long)g_free_via_fastcache[i],
(unsigned long long)g_free_via_tls_sll[i],
(unsigned long long)g_free_via_mag[i],
(unsigned long long)g_fast_spare_flush[i]);
}
// Publish pipeline
extern unsigned long long g_pub_notify_calls[];
extern unsigned long long g_pub_same_empty[];
extern unsigned long long g_remote_transitions[];
extern unsigned long long g_mailbox_register_calls[];
extern unsigned long long g_mailbox_slow_discoveries[];
fprintf(stderr, "\n[Publish Pipeline]\n");
fprintf(stderr, "class, notify_calls, same_empty_pubs, remote_transitions, mailbox_reg_calls, mailbox_slow_disc\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu,%llu,%llu,%llu\n", i,
(unsigned long long)g_pub_notify_calls[i],
(unsigned long long)g_pub_same_empty[i],
(unsigned long long)g_remote_transitions[i],
(unsigned long long)g_mailbox_register_calls[i],
(unsigned long long)g_mailbox_slow_discoveries[i]);
}
extern unsigned long long g_fast_push_hits[];
extern unsigned long long g_fast_push_full[];
extern unsigned long long g_fast_push_disabled[];
extern unsigned long long g_fast_push_zero_cap[];
extern unsigned long long g_fast_push_gate_disabled[];
extern unsigned long long g_fast_push_gate_zero_cap[];
extern unsigned long long g_fast_spare_attempts[];
extern unsigned long long g_fast_spare_disabled[];
extern unsigned long long g_fast_spare_empty[];
extern unsigned long long g_fast_spare_lookup_fail[];
extern unsigned long long g_fast_spare_bad_index[];
extern unsigned long long g_fast_lookup_ss[];
extern unsigned long long g_fast_lookup_slab[];
extern unsigned long long g_fast_lookup_none;
fprintf(stderr, "\n[Fast Cache Debug]\n");
fprintf(stderr, "class, push_hits, push_full, push_disabled, push_zero_cap, gate_disabled, gate_zero_cap, spare_attempts, spare_disabled, spare_empty, spare_lookup_fail, spare_bad_index, lookup_ss, lookup_slab\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu\n", i,
(unsigned long long)g_fast_push_hits[i],
(unsigned long long)g_fast_push_full[i],
(unsigned long long)g_fast_push_disabled[i],
(unsigned long long)g_fast_push_zero_cap[i],
(unsigned long long)g_fast_push_gate_disabled[i],
(unsigned long long)g_fast_push_gate_zero_cap[i],
(unsigned long long)g_fast_spare_attempts[i],
(unsigned long long)g_fast_spare_disabled[i],
(unsigned long long)g_fast_spare_empty[i],
(unsigned long long)g_fast_spare_lookup_fail[i],
(unsigned long long)g_fast_spare_bad_index[i],
(unsigned long long)g_fast_lookup_ss[i],
(unsigned long long)g_fast_lookup_slab[i]);
}
fprintf(stderr, "lookup_none,%llu\n", (unsigned long long)g_fast_lookup_none);
// Refill timing (ns)
extern unsigned long long g_rf_time_total_ns[];
extern unsigned long long g_rf_time_hot_ns[];
extern unsigned long long g_rf_time_bench_ns[];
extern unsigned long long g_rf_time_mail_ns[];
extern unsigned long long g_rf_time_slab_ns[];
extern unsigned long long g_rf_time_ss_ns[];
extern unsigned long long g_rf_time_reg_ns[];
extern unsigned long long g_rf_time_mmap_ns[];
fprintf(stderr, "\n[Refill Time (ns)]\n");
fprintf(stderr, "class, total, hot, bench, mail, slab, ss, reg, mmap\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu\n", i,
(unsigned long long)g_rf_time_total_ns[i],
(unsigned long long)g_rf_time_hot_ns[i],
(unsigned long long)g_rf_time_bench_ns[i],
(unsigned long long)g_rf_time_mail_ns[i],
(unsigned long long)g_rf_time_slab_ns[i],
(unsigned long long)g_rf_time_ss_ns[i],
(unsigned long long)g_rf_time_reg_ns[i],
(unsigned long long)g_rf_time_mmap_ns[i]);
}
(void)on;
#endif
}
// Always-available: Refill stage counters dump (env: HAKMEM_TINY_REFILL_DUMP=1 or reuse HAKMEM_TINY_COUNTERS_DUMP)
static void hak_tiny_refill_counters_dump(void) {
hak_tiny_stats_init_flags();
const char* on1 = getenv("HAKMEM_TINY_REFILL_DUMP");
const char* on2 = getenv("HAKMEM_TINY_COUNTERS_DUMP");
if (!((on1 && atoi(on1)!=0) || (on2 && atoi(on2)!=0))) return;
extern unsigned long long g_rf_total_calls[];
extern unsigned long long g_rf_hit_bench[];
extern unsigned long long g_rf_hit_hot[];
extern unsigned long long g_rf_hit_mail[];
extern unsigned long long g_rf_hit_slab[];
extern unsigned long long g_rf_hit_ss[];
extern unsigned long long g_rf_hit_reg[];
extern unsigned long long g_rf_mmap_calls[];
fprintf(stderr, "\n[Refill Stage Counters]\n");
fprintf(stderr, "class, total, mail, bench, hot, slab, ss, reg, mmap\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu\n", i,
(unsigned long long)g_rf_total_calls[i],
(unsigned long long)g_rf_hit_mail[i],
(unsigned long long)g_rf_hit_bench[i],
(unsigned long long)g_rf_hit_hot[i],
(unsigned long long)g_rf_hit_slab[i],
(unsigned long long)g_rf_hit_ss[i],
(unsigned long long)g_rf_hit_reg[i],
(unsigned long long)g_rf_mmap_calls[i]);
}
// Diagnostic: refill early return counters
extern unsigned long long g_rf_early_no_ss[];
extern unsigned long long g_rf_early_no_meta[];
extern unsigned long long g_rf_early_no_room[];
extern unsigned long long g_rf_early_want_zero[];
fprintf(stderr, "\n[Refill Early Returns - Diagnostic]\n");
fprintf(stderr, "class, no_ss, no_meta, no_room, want_zero\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu,%llu,%llu\n", i,
(unsigned long long)g_rf_early_no_ss[i],
(unsigned long long)g_rf_early_no_meta[i],
(unsigned long long)g_rf_early_no_room[i],
(unsigned long long)g_rf_early_want_zero[i]);
}
// Publish-side counters (always available)
extern unsigned long long g_pub_bench_hits[];
extern unsigned long long g_pub_hot_hits[];
extern unsigned long long g_pub_mail_hits[];
fprintf(stderr, "\n[Publish Hits]\n");
fprintf(stderr, "class, pub_mail, pub_bench, pub_hot\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu,%llu\n", i,
(unsigned long long)g_pub_mail_hits[i],
(unsigned long long)g_pub_bench_hits[i],
(unsigned long long)g_pub_hot_hits[i]);
}
extern uint64_t g_ss_cache_hits[];
extern uint64_t g_ss_cache_misses[];
extern uint64_t g_ss_cache_puts[];
extern uint64_t g_ss_cache_drops[];
extern uint64_t g_ss_cache_precharged[];
extern uint64_t g_superslabs_reused;
extern uint64_t g_superslabs_cached;
fprintf(stderr, "\n[SS Cache Stats]\n");
fprintf(stderr, "class, cache_hits, cache_misses, cache_puts, cache_drops, precharged\n");
for (int i = 0; i < 8; i++) {
fprintf(stderr, "%d,%llu,%llu,%llu,%llu,%llu\n", i,
(unsigned long long)g_ss_cache_hits[i],
(unsigned long long)g_ss_cache_misses[i],
(unsigned long long)g_ss_cache_puts[i],
(unsigned long long)g_ss_cache_drops[i],
(unsigned long long)g_ss_cache_precharged[i]);
}
fprintf(stderr, "cache_reused=%llu cache_cached=%llu\n",
(unsigned long long)g_superslabs_reused,
(unsigned long long)g_superslabs_cached);
// Free pipeline
extern unsigned long long g_free_via_ss_local[];
extern unsigned long long g_free_via_ss_remote[];
extern unsigned long long g_free_via_tls_sll[];
extern unsigned long long g_free_via_mag[];
extern unsigned long long g_free_via_fast_tls[];
extern unsigned long long g_free_via_fastcache[];
extern unsigned long long g_fast_spare_flush[];
fprintf(stderr, "\n[Free Pipeline]\n");
fprintf(stderr, "class, ss_local, ss_remote, fast_tls, fast_cache, tls_sll, magazine, fast_spare_flush\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu,%llu,%llu,%llu,%llu,%llu\n", i,
(unsigned long long)g_free_via_ss_local[i],
(unsigned long long)g_free_via_ss_remote[i],
(unsigned long long)g_free_via_fast_tls[i],
(unsigned long long)g_free_via_fastcache[i],
(unsigned long long)g_free_via_tls_sll[i],
(unsigned long long)g_free_via_mag[i],
(unsigned long long)g_fast_spare_flush[i]);
}
// Publish pipeline
extern unsigned long long g_pub_notify_calls[];
extern unsigned long long g_pub_same_empty[];
extern unsigned long long g_remote_transitions[];
extern unsigned long long g_mailbox_register_calls[];
extern unsigned long long g_mailbox_slow_discoveries[];
fprintf(stderr, "\n[Publish Pipeline]\n");
fprintf(stderr, "class, notify_calls, same_empty_pubs, remote_transitions, mailbox_reg_calls, mailbox_slow_disc\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu,%llu,%llu,%llu\n", i,
(unsigned long long)g_pub_notify_calls[i],
(unsigned long long)g_pub_same_empty[i],
(unsigned long long)g_remote_transitions[i],
(unsigned long long)g_mailbox_register_calls[i],
(unsigned long long)g_mailbox_slow_discoveries[i]);
}
extern unsigned long long g_fast_push_hits[];
extern unsigned long long g_fast_push_full[];
extern unsigned long long g_fast_push_disabled[];
extern unsigned long long g_fast_push_zero_cap[];
extern unsigned long long g_fast_push_gate_disabled[];
extern unsigned long long g_fast_push_gate_zero_cap[];
extern unsigned long long g_fast_spare_attempts[];
extern unsigned long long g_fast_spare_disabled[];
extern unsigned long long g_fast_spare_empty[];
extern unsigned long long g_fast_spare_lookup_fail[];
extern unsigned long long g_fast_spare_bad_index[];
extern unsigned long long g_fast_lookup_ss[];
extern unsigned long long g_fast_lookup_slab[];
extern unsigned long long g_fast_lookup_none;
fprintf(stderr, "\n[Fast Cache Debug]\n");
fprintf(stderr, "class, push_hits, push_full, push_disabled, push_zero_cap, gate_disabled, gate_zero_cap, spare_attempts, spare_disabled, spare_empty, spare_lookup_fail, spare_bad_index, lookup_ss, lookup_slab\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu\n", i,
(unsigned long long)g_fast_push_hits[i],
(unsigned long long)g_fast_push_full[i],
(unsigned long long)g_fast_push_disabled[i],
(unsigned long long)g_fast_push_zero_cap[i],
(unsigned long long)g_fast_push_gate_disabled[i],
(unsigned long long)g_fast_push_gate_zero_cap[i],
(unsigned long long)g_fast_spare_attempts[i],
(unsigned long long)g_fast_spare_disabled[i],
(unsigned long long)g_fast_spare_empty[i],
(unsigned long long)g_fast_spare_lookup_fail[i],
(unsigned long long)g_fast_spare_bad_index[i],
(unsigned long long)g_fast_lookup_ss[i],
(unsigned long long)g_fast_lookup_slab[i]);
}
fprintf(stderr, "lookup_none,%llu\n", (unsigned long long)g_fast_lookup_none);
// Refill timing (ns)
extern unsigned long long g_rf_time_total_ns[];
extern unsigned long long g_rf_time_hot_ns[];
extern unsigned long long g_rf_time_bench_ns[];
extern unsigned long long g_rf_time_mail_ns[];
extern unsigned long long g_rf_time_slab_ns[];
extern unsigned long long g_rf_time_ss_ns[];
extern unsigned long long g_rf_time_reg_ns[];
extern unsigned long long g_rf_time_mmap_ns[];
fprintf(stderr, "\n[Refill Time (ns)]\n");
fprintf(stderr, "class, total, hot, bench, mail, slab, ss, reg, mmap\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
fprintf(stderr, "%d,%llu,%llu,%llu,%llu,%llu,%llu,%llu,%llu\n", i,
(unsigned long long)g_rf_time_total_ns[i],
(unsigned long long)g_rf_time_hot_ns[i],
(unsigned long long)g_rf_time_bench_ns[i],
(unsigned long long)g_rf_time_mail_ns[i],
(unsigned long long)g_rf_time_slab_ns[i],
(unsigned long long)g_rf_time_ss_ns[i],
(unsigned long long)g_rf_time_reg_ns[i],
(unsigned long long)g_rf_time_mmap_ns[i]);
}
}
__attribute__((destructor))
static void hak_tiny_stats_auto_dump(void) {
// Dump at exit if enabled or atexit-only requested
hak_tiny_stats_init_flags();
if (g_dump_atexit_only) {
// Force dump regardless of HAKMEM_TINY_COUNTERS_DUMP when atexit-only
// Temporarily set env flag behavior by direct call to minimal + extended
hak_tiny_dump_all_counters_now();
} else {
hak_tiny_refill_counters_dump();
}
}