Fix C7 warm/TLS Release path and unify debug instrumentation
This commit is contained in:
@ -12,10 +12,19 @@
|
||||
#include "../box/ss_slab_meta_box.h" // For ss_active_add() and slab metadata operations
|
||||
#include "../box/warm_pool_stats_box.h" // Box: Warm Pool Statistics Recording (inline)
|
||||
#include "../box/slab_carve_box.h" // Box: Slab Carving (inline O(slabs) scan)
|
||||
#define WARM_POOL_REL_DEFINE
|
||||
#include "../box/warm_pool_rel_counters_box.h" // Box: Release-side C7 counters
|
||||
#undef WARM_POOL_REL_DEFINE
|
||||
#include "../box/c7_meta_used_counter_box.h" // Box: C7 meta->used increment counters
|
||||
#include "../box/warm_pool_prefill_box.h" // Box: Warm Pool Prefill (secondary optimization)
|
||||
#include "../hakmem_env_cache.h" // Priority-2: ENV cache (eliminate syscalls)
|
||||
#include "../box/tiny_page_box.h" // Tiny-Plus Page Box (C5–C7 initial hook)
|
||||
#include "../box/ss_tls_bind_box.h" // Box: TLS Bind (SuperSlab -> TLS binding)
|
||||
#include "../box/tiny_tls_carve_one_block_box.h" // Box: TLS carve helper (shared)
|
||||
#include "../box/warm_tls_bind_logger_box.h" // Box: Warm TLS Bind logging (throttled)
|
||||
#define WARM_POOL_DBG_DEFINE
|
||||
#include "../box/warm_pool_dbg_box.h" // Box: Warm Pool C7 debug counters
|
||||
#undef WARM_POOL_DBG_DEFINE
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdatomic.h>
|
||||
@ -84,6 +93,12 @@ __thread uint64_t g_unified_cache_push[TINY_NUM_CLASSES] = {0};
|
||||
__thread uint64_t g_unified_cache_full[TINY_NUM_CLASSES] = {0};
|
||||
#endif
|
||||
|
||||
// Release-side lightweight telemetry (C7 Warm path only)
|
||||
#if HAKMEM_BUILD_RELEASE
|
||||
_Atomic uint64_t g_rel_c7_warm_pop = 0;
|
||||
_Atomic uint64_t g_rel_c7_warm_push = 0;
|
||||
#endif
|
||||
|
||||
// Warm Pool metrics (definition - declared in tiny_warm_pool.h as extern)
|
||||
// Note: These are kept outside !HAKMEM_BUILD_RELEASE for profiling in release builds
|
||||
__thread TinyWarmPoolStats g_warm_pool_stats[TINY_NUM_CLASSES] = {0};
|
||||
@ -98,46 +113,36 @@ _Atomic uint64_t g_dbg_warm_pop_attempts = 0;
|
||||
_Atomic uint64_t g_dbg_warm_pop_hits = 0;
|
||||
_Atomic uint64_t g_dbg_warm_pop_empty = 0;
|
||||
_Atomic uint64_t g_dbg_warm_pop_carve_zero = 0;
|
||||
#endif
|
||||
|
||||
// Debug-only: cached ENV for Warm TLS Bind (C7)
|
||||
static int g_warm_tls_bind_mode_c7 = -1;
|
||||
|
||||
// Warm TLS Bind (C7) mode selector
|
||||
// mode 0: Legacy warm path(デバッグ専用・C7では非推奨)
|
||||
// mode 1: Bind-only 本番経路(C7 標準)
|
||||
// mode 2: Bind + TLS carve 実験経路(Debug 専用)
|
||||
// Release ビルドでは常に mode=1 に固定し、ENV は無視する。
|
||||
static inline int warm_tls_bind_mode_c7(void) {
|
||||
#if HAKMEM_BUILD_RELEASE
|
||||
static int g_warm_tls_bind_mode_c7 = -1;
|
||||
if (__builtin_expect(g_warm_tls_bind_mode_c7 == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_WARM_TLS_BIND_C7");
|
||||
// 0/empty: disabled, 1: bind only, 2: bind + TLS carve one block
|
||||
g_warm_tls_bind_mode_c7 = (e && *e) ? atoi(e) : 0;
|
||||
int mode = (e && *e) ? atoi(e) : 1; // default = Bind-only
|
||||
if (mode < 0) mode = 0;
|
||||
if (mode > 2) mode = 2;
|
||||
g_warm_tls_bind_mode_c7 = mode;
|
||||
}
|
||||
return g_warm_tls_bind_mode_c7;
|
||||
}
|
||||
|
||||
static inline void* warm_tls_carve_one_block(int class_idx) {
|
||||
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
||||
TinySlabMeta* meta = tls->meta;
|
||||
|
||||
if (!meta || !tls->ss || tls->slab_base == NULL) return NULL;
|
||||
if (meta->class_idx != (uint8_t)class_idx) return NULL;
|
||||
if (tls->slab_idx < 0 || tls->slab_idx >= ss_slabs_capacity(tls->ss)) return NULL;
|
||||
|
||||
if (meta->freelist) {
|
||||
void* block = meta->freelist;
|
||||
meta->freelist = tiny_next_read(class_idx, block);
|
||||
meta->used++;
|
||||
ss_active_add(tls->ss, 1);
|
||||
return block;
|
||||
#else
|
||||
static int g_warm_tls_bind_mode_c7 = -1;
|
||||
if (__builtin_expect(g_warm_tls_bind_mode_c7 == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_WARM_TLS_BIND_C7");
|
||||
int mode = (e && *e) ? atoi(e) : 1; // default = Bind-only
|
||||
if (mode < 0) mode = 0;
|
||||
if (mode > 2) mode = 2;
|
||||
g_warm_tls_bind_mode_c7 = mode;
|
||||
}
|
||||
|
||||
if (meta->used < meta->capacity) {
|
||||
size_t block_size = tiny_stride_for_class(meta->class_idx);
|
||||
void* block = tiny_block_at_index(tls->slab_base, meta->used, block_size);
|
||||
meta->used++;
|
||||
ss_active_add(tls->ss, 1);
|
||||
return block;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
return g_warm_tls_bind_mode_c7;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Forward declaration for Warm Pool stats printer (defined later in this file)
|
||||
static inline void tiny_warm_pool_print_stats(void);
|
||||
@ -157,6 +162,15 @@ int unified_cache_enabled(void) {
|
||||
fprintf(stderr, "[Unified-INIT] unified_cache_enabled() = %d\n", g_enable);
|
||||
fflush(stderr);
|
||||
}
|
||||
#else
|
||||
if (g_enable) {
|
||||
static int printed = 0;
|
||||
if (!printed) {
|
||||
fprintf(stderr, "[Rel-Unified] unified_cache_enabled() = %d\n", g_enable);
|
||||
fflush(stderr);
|
||||
printed = 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
return g_enable;
|
||||
@ -311,6 +325,32 @@ static inline void tiny_warm_pool_print_stats(void) {
|
||||
(unsigned long long)atomic_load_explicit(&g_dbg_warm_pop_hits, memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_dbg_warm_pop_empty, memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_dbg_warm_pop_carve_zero, memory_order_relaxed));
|
||||
uint64_t c7_attempts = warm_pool_dbg_c7_attempts();
|
||||
uint64_t c7_hits = warm_pool_dbg_c7_hits();
|
||||
uint64_t c7_carve = warm_pool_dbg_c7_carves();
|
||||
uint64_t c7_tls_attempts = warm_pool_dbg_c7_tls_attempts();
|
||||
uint64_t c7_tls_success = warm_pool_dbg_c7_tls_successes();
|
||||
uint64_t c7_tls_fail = warm_pool_dbg_c7_tls_failures();
|
||||
uint64_t c7_uc_warm = warm_pool_dbg_c7_uc_miss_warm_refills();
|
||||
uint64_t c7_uc_tls = warm_pool_dbg_c7_uc_miss_tls_refills();
|
||||
uint64_t c7_uc_shared = warm_pool_dbg_c7_uc_miss_shared_refills();
|
||||
if (c7_attempts || c7_hits || c7_carve ||
|
||||
c7_tls_attempts || c7_tls_success || c7_tls_fail ||
|
||||
c7_uc_warm || c7_uc_tls || c7_uc_shared) {
|
||||
fprintf(stderr,
|
||||
" [DBG_C7] warm_pop_attempts=%llu warm_pop_hits=%llu warm_pop_carve=%llu "
|
||||
"tls_carve_attempts=%llu tls_carve_success=%llu tls_carve_fail=%llu "
|
||||
"uc_miss_warm=%llu uc_miss_tls=%llu uc_miss_shared=%llu\n",
|
||||
(unsigned long long)c7_attempts,
|
||||
(unsigned long long)c7_hits,
|
||||
(unsigned long long)c7_carve,
|
||||
(unsigned long long)c7_tls_attempts,
|
||||
(unsigned long long)c7_tls_success,
|
||||
(unsigned long long)c7_tls_fail,
|
||||
(unsigned long long)c7_uc_warm,
|
||||
(unsigned long long)c7_uc_tls,
|
||||
(unsigned long long)c7_uc_shared);
|
||||
}
|
||||
#endif
|
||||
fflush(stderr);
|
||||
}
|
||||
@ -515,6 +555,7 @@ hak_base_ptr_t unified_cache_refill(int class_idx) {
|
||||
// - これにより、room <= max_batch <= 512 が常に成り立ち、out[] オーバーランを防止する。
|
||||
void* out[512];
|
||||
int produced = 0;
|
||||
int tls_carved = 0; // Debug bookkeeping: track TLS carve experiment hits
|
||||
|
||||
// ========== PAGE BOX HOT PATH(Tiny-Plus 層): Try page box FIRST ==========
|
||||
// 将来的に C7 専用の page-level freelist 管理をここに統合する。
|
||||
@ -554,10 +595,21 @@ hak_base_ptr_t unified_cache_refill(int class_idx) {
|
||||
// This is the critical optimization - avoid superslab_refill() registry scan
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
atomic_fetch_add_explicit(&g_dbg_warm_pop_attempts, 1, memory_order_relaxed);
|
||||
if (class_idx == 7) {
|
||||
warm_pool_dbg_c7_attempt();
|
||||
}
|
||||
#endif
|
||||
#if HAKMEM_BUILD_RELEASE
|
||||
if (class_idx == 7) {
|
||||
atomic_fetch_add_explicit(&g_rel_c7_warm_pop, 1, memory_order_relaxed);
|
||||
}
|
||||
#endif
|
||||
SuperSlab* warm_ss = tiny_warm_pool_pop(class_idx);
|
||||
if (warm_ss) {
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
if (class_idx == 7) {
|
||||
warm_pool_dbg_c7_hit();
|
||||
}
|
||||
// Debug-only: Warm TLS Bind experiment (C7 only)
|
||||
if (class_idx == 7) {
|
||||
int warm_mode = warm_tls_bind_mode_c7();
|
||||
@ -577,25 +629,22 @@ hak_base_ptr_t unified_cache_refill(int class_idx) {
|
||||
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
||||
uint32_t tid = (uint32_t)(uintptr_t)pthread_self();
|
||||
if (ss_tls_bind_one(class_idx, tls, warm_ss, slab_idx, tid)) {
|
||||
static int logged = 0;
|
||||
if (!logged) {
|
||||
fprintf(stderr, "[WARM_TLS_BIND] C7 bind success: ss=%p slab=%d\n",
|
||||
(void*)warm_ss, slab_idx);
|
||||
logged = 1;
|
||||
}
|
||||
warm_tls_bind_log_success(warm_ss, slab_idx);
|
||||
|
||||
// Mode 2: carve a single block via TLS fast path
|
||||
if (warm_mode == 2) {
|
||||
void* tls_block = warm_tls_carve_one_block(class_idx);
|
||||
if (tls_block) {
|
||||
fprintf(stderr,
|
||||
"[WARM_TLS_BIND] C7 TLS carve success: ss=%p slab=%d block=%p\n",
|
||||
(void*)warm_ss, slab_idx, tls_block);
|
||||
out[0] = tls_block;
|
||||
warm_pool_dbg_c7_tls_attempt();
|
||||
TinyTLSCarveOneResult tls_carve =
|
||||
tiny_tls_carve_one_block(tls, class_idx);
|
||||
if (tls_carve.block) {
|
||||
warm_tls_bind_log_tls_carve(warm_ss, slab_idx, tls_carve.block);
|
||||
warm_pool_dbg_c7_tls_success();
|
||||
out[0] = tls_carve.block;
|
||||
produced = 1;
|
||||
tls_carved = 1;
|
||||
} else {
|
||||
fprintf(stderr,
|
||||
"[WARM_TLS_BIND] C7 TLS carve failed, fallback\n");
|
||||
warm_tls_bind_log_tls_fail(warm_ss, slab_idx);
|
||||
warm_pool_dbg_c7_tls_fail();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -607,7 +656,21 @@ hak_base_ptr_t unified_cache_refill(int class_idx) {
|
||||
#endif
|
||||
// HOT PATH: Warm pool hit, try to carve directly
|
||||
if (produced == 0) {
|
||||
#if HAKMEM_BUILD_RELEASE
|
||||
if (class_idx == 7) {
|
||||
warm_pool_rel_c7_carve_attempt();
|
||||
}
|
||||
#endif
|
||||
produced = slab_carve_from_ss(class_idx, warm_ss, out, room);
|
||||
#if HAKMEM_BUILD_RELEASE
|
||||
if (class_idx == 7) {
|
||||
if (produced > 0) {
|
||||
warm_pool_rel_c7_carve_success();
|
||||
} else {
|
||||
warm_pool_rel_c7_carve_zero();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (produced > 0) {
|
||||
// Update active counter for carved blocks
|
||||
ss_active_add(warm_ss, (uint32_t)produced);
|
||||
@ -615,7 +678,22 @@ hak_base_ptr_t unified_cache_refill(int class_idx) {
|
||||
}
|
||||
|
||||
if (produced > 0) {
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
if (class_idx == 7) {
|
||||
warm_pool_dbg_c7_carve();
|
||||
if (tls_carved) {
|
||||
warm_pool_dbg_c7_uc_miss_tls();
|
||||
} else {
|
||||
warm_pool_dbg_c7_uc_miss_warm();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// Success! Return SuperSlab to warm pool for next use
|
||||
#if HAKMEM_BUILD_RELEASE
|
||||
if (class_idx == 7) {
|
||||
atomic_fetch_add_explicit(&g_rel_c7_warm_push, 1, memory_order_relaxed);
|
||||
}
|
||||
#endif
|
||||
tiny_warm_pool_push(class_idx, warm_ss);
|
||||
|
||||
// Track warm pool hit (always compiled, ENV-gated printing)
|
||||
@ -761,6 +839,9 @@ hak_base_ptr_t unified_cache_refill(int class_idx) {
|
||||
}
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
if (class_idx == 7) {
|
||||
warm_pool_dbg_c7_uc_miss_shared();
|
||||
}
|
||||
g_unified_cache_miss[class_idx]++;
|
||||
#endif
|
||||
|
||||
|
||||
Reference in New Issue
Block a user