Fix C7 warm/TLS Release path and unify debug instrumentation

This commit is contained in:
Moe Charm (CI)
2025-12-05 23:41:01 +09:00
parent 96c2988381
commit d17ec46628
29 changed files with 1314 additions and 123 deletions

View File

@ -0,0 +1,59 @@
// c7_meta_used_counter_box.h
// Box: C7 meta->used increment counters (Release/Debug共通)
#pragma once
#include <stdatomic.h>
#include <stdint.h>
typedef enum C7MetaUsedSource {
C7_META_USED_SRC_UNKNOWN = 0,
C7_META_USED_SRC_BACKEND = 1,
C7_META_USED_SRC_TLS = 2,
C7_META_USED_SRC_FRONT = 3,
} C7MetaUsedSource;
#ifdef C7_META_COUNTER_DEFINE
#define C7_META_COUNTER_EXTERN
#else
#define C7_META_COUNTER_EXTERN extern
#endif
C7_META_COUNTER_EXTERN _Atomic uint64_t g_c7_meta_used_inc_total;
C7_META_COUNTER_EXTERN _Atomic uint64_t g_c7_meta_used_inc_backend;
C7_META_COUNTER_EXTERN _Atomic uint64_t g_c7_meta_used_inc_tls;
C7_META_COUNTER_EXTERN _Atomic uint64_t g_c7_meta_used_inc_front;
static inline void c7_meta_used_note(int class_idx, C7MetaUsedSource src) {
if (__builtin_expect(class_idx != 7, 1)) {
return;
}
atomic_fetch_add_explicit(&g_c7_meta_used_inc_total, 1, memory_order_relaxed);
switch (src) {
case C7_META_USED_SRC_BACKEND:
atomic_fetch_add_explicit(&g_c7_meta_used_inc_backend, 1, memory_order_relaxed);
break;
case C7_META_USED_SRC_TLS:
atomic_fetch_add_explicit(&g_c7_meta_used_inc_tls, 1, memory_order_relaxed);
break;
case C7_META_USED_SRC_FRONT:
atomic_fetch_add_explicit(&g_c7_meta_used_inc_front, 1, memory_order_relaxed);
break;
default:
break;
}
}
static inline uint64_t c7_meta_used_total(void) {
return atomic_load_explicit(&g_c7_meta_used_inc_total, memory_order_relaxed);
}
static inline uint64_t c7_meta_used_backend(void) {
return atomic_load_explicit(&g_c7_meta_used_inc_backend, memory_order_relaxed);
}
static inline uint64_t c7_meta_used_tls(void) {
return atomic_load_explicit(&g_c7_meta_used_inc_tls, memory_order_relaxed);
}
static inline uint64_t c7_meta_used_front(void) {
return atomic_load_explicit(&g_c7_meta_used_inc_front, memory_order_relaxed);
}
#undef C7_META_COUNTER_EXTERN

View File

@ -15,6 +15,7 @@
#include "tiny_header_box.h" // Header Box: Single Source of Truth for header operations
#include "../tiny_refill_opt.h" // TinyRefillChain, trc_linear_carve()
#include "../tiny_box_geometry.h" // tiny_stride_for_class(), tiny_slab_base_for_geometry()
#include "c7_meta_used_counter_box.h"
// External declarations
extern __thread TinyTLSSlab g_tls_slabs[TINY_NUM_CLASSES];
@ -191,6 +192,7 @@ uint32_t box_carve_and_push_with_freelist(int class_idx, uint32_t want) {
void* p = meta->freelist;
meta->freelist = tiny_next_read(class_idx, p);
meta->used++;
c7_meta_used_note(class_idx, C7_META_USED_SRC_FRONT);
// CRITICAL FIX: Restore header BEFORE pushing to TLS SLL
// Freelist blocks may have stale data at offset 0

View File

@ -41,7 +41,7 @@ core/box/carve_push_box.o: core/box/carve_push_box.c \
core/box/../tiny_region_id.h core/box/../hakmem_tiny_integrity.h \
core/box/../box/slab_freelist_atomic.h core/box/tiny_header_box.h \
core/box/../tiny_refill_opt.h core/box/../box/tls_sll_box.h \
core/box/../tiny_box_geometry.h
core/box/../tiny_box_geometry.h core/box/c7_meta_used_counter_box.h
core/box/../hakmem_tiny.h:
core/box/../hakmem_build_flags.h:
core/box/../hakmem_trace.h:
@ -116,3 +116,4 @@ core/box/tiny_header_box.h:
core/box/../tiny_refill_opt.h:
core/box/../box/tls_sll_box.h:
core/box/../tiny_box_geometry.h:
core/box/c7_meta_used_counter_box.h:

View File

@ -9,12 +9,15 @@
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include <stdatomic.h>
#include "../hakmem_tiny_config.h"
#include "../hakmem_tiny_superslab.h"
#include "../superslab/superslab_inline.h"
#include "../tiny_box_geometry.h"
#include "../box/tiny_next_ptr_box.h"
#include "../box/pagefault_telemetry_box.h"
#include "c7_meta_used_counter_box.h"
// ============================================================================
// Slab Carving API (Inline for Hot Path)
@ -46,11 +49,31 @@ static inline int slab_carve_from_ss(int class_idx, SuperSlab* ss,
// Find an available slab in this SuperSlab
int cap = ss_slabs_capacity(ss);
#if HAKMEM_BUILD_RELEASE
static _Atomic int rel_c7_meta_logged = 0;
TinySlabMeta* rel_c7_meta = NULL;
int rel_c7_meta_idx = -1;
#else
static __thread int dbg_c7_meta_logged = 0;
TinySlabMeta* dbg_c7_meta = NULL;
int dbg_c7_meta_idx = -1;
#endif
for (int slab_idx = 0; slab_idx < cap; slab_idx++) {
TinySlabMeta* meta = &ss->slabs[slab_idx];
// Check if this slab matches our class and has capacity
if (meta->class_idx != (uint8_t)class_idx) continue;
#if HAKMEM_BUILD_RELEASE
if (class_idx == 7 && atomic_load_explicit(&rel_c7_meta_logged, memory_order_relaxed) == 0 && !rel_c7_meta) {
rel_c7_meta = meta;
rel_c7_meta_idx = slab_idx;
}
#else
if (class_idx == 7 && dbg_c7_meta_logged == 0 && !dbg_c7_meta) {
dbg_c7_meta = meta;
dbg_c7_meta_idx = slab_idx;
}
#endif
if (meta->used >= meta->capacity && !meta->freelist) continue;
// Carve blocks from this slab
@ -73,6 +96,7 @@ static inline int slab_carve_from_ss(int class_idx, SuperSlab* ss,
meta->freelist = next_node;
meta->used++;
c7_meta_used_note(class_idx, C7_META_USED_SRC_FRONT);
} else if (meta->carved < meta->capacity) {
// Linear carve
@ -84,6 +108,7 @@ static inline int slab_carve_from_ss(int class_idx, SuperSlab* ss,
meta->carved++;
meta->used++;
c7_meta_used_note(class_idx, C7_META_USED_SRC_FRONT);
} else {
break; // This slab exhausted
@ -99,6 +124,48 @@ static inline int slab_carve_from_ss(int class_idx, SuperSlab* ss,
// If this slab had no freelist and no carved capacity, continue to next
}
#if !HAKMEM_BUILD_RELEASE
static __thread int dbg_c7_slab_carve_zero_logs = 0;
if (class_idx == 7 && dbg_c7_slab_carve_zero_logs < 10) {
fprintf(stderr, "[C7_SLAB_CARVE_ZERO] ss=%p no blocks carved\n", (void*)ss);
dbg_c7_slab_carve_zero_logs++;
}
#endif
#if HAKMEM_BUILD_RELEASE
if (class_idx == 7 &&
atomic_load_explicit(&rel_c7_meta_logged, memory_order_relaxed) == 0 &&
rel_c7_meta) {
size_t bs = tiny_stride_for_class(class_idx);
fprintf(stderr,
"[REL_C7_CARVE_META] ss=%p slab=%d cls=%u used=%u cap=%u carved=%u freelist=%p stride=%zu slabs_cap=%d\n",
(void*)ss,
rel_c7_meta_idx,
(unsigned)rel_c7_meta->class_idx,
(unsigned)rel_c7_meta->used,
(unsigned)rel_c7_meta->capacity,
(unsigned)rel_c7_meta->carved,
rel_c7_meta->freelist,
bs,
cap);
atomic_store_explicit(&rel_c7_meta_logged, 1, memory_order_relaxed);
}
#else
if (class_idx == 7 && dbg_c7_meta_logged == 0 && dbg_c7_meta) {
size_t bs = tiny_stride_for_class(class_idx);
fprintf(stderr,
"[DBG_C7_CARVE_META] ss=%p slab=%d cls=%u used=%u cap=%u carved=%u freelist=%p stride=%zu slabs_cap=%d\n",
(void*)ss,
dbg_c7_meta_idx,
(unsigned)dbg_c7_meta->class_idx,
(unsigned)dbg_c7_meta->used,
(unsigned)dbg_c7_meta->capacity,
(unsigned)dbg_c7_meta->carved,
dbg_c7_meta->freelist,
bs,
cap);
dbg_c7_meta_logged = 1;
}
#endif
return 0; // No slab in this SuperSlab had available capacity
}

View File

@ -0,0 +1,26 @@
// ss_slab_reset_box.h
// Box: Reset TinySlabMeta for reuse (C7 diagnostics-friendly)
#pragma once
#include "ss_slab_meta_box.h"
#include "../superslab/superslab_inline.h"
#include <stdatomic.h>
static inline void ss_slab_reset_meta_for_tiny(SuperSlab* ss,
int slab_idx,
int class_idx)
{
if (!ss) return;
if (slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) return;
TinySlabMeta* meta = &ss->slabs[slab_idx];
meta->used = 0;
meta->carved = 0;
meta->freelist = NULL;
meta->class_idx = (uint8_t)class_idx;
ss->class_map[slab_idx] = (uint8_t)class_idx;
// Reset remote queue state to avoid stale pending frees on reuse.
atomic_store_explicit(&ss->remote_heads[slab_idx], 0, memory_order_relaxed);
atomic_store_explicit(&ss->remote_counts[slab_idx], 0, memory_order_relaxed);
}

View File

@ -13,6 +13,7 @@
#include "../hakmem_tiny_config.h"
#include "../box/tiny_page_box.h" // For tiny_page_box_on_new_slab()
#include <stdio.h>
#include <stdatomic.h>
// Forward declaration if not included
// CRITICAL FIX: type must match core/hakmem_tiny_config.h (const size_t, not uint16_t)
@ -64,9 +65,7 @@ static inline int ss_tls_bind_one(int class_idx,
// superslab_init_slab() only sets it if meta->class_idx==255.
// We must explicitly set it to the requested class to avoid C0/C7 confusion.
TinySlabMeta* meta = &ss->slabs[slab_idx];
#if !HAKMEM_BUILD_RELEASE
uint8_t old_cls = meta->class_idx;
#endif
meta->class_idx = (uint8_t)class_idx;
#if !HAKMEM_BUILD_RELEASE
if (class_idx == 7 && old_cls != class_idx) {
@ -75,6 +74,36 @@ static inline int ss_tls_bind_one(int class_idx,
}
#endif
#if HAKMEM_BUILD_RELEASE
static _Atomic int rel_c7_bind_logged = 0;
if (class_idx == 7 &&
atomic_load_explicit(&rel_c7_bind_logged, memory_order_relaxed) == 0) {
fprintf(stderr,
"[REL_C7_BIND] ss=%p slab=%d cls=%u cap=%u used=%u carved=%u\n",
(void*)ss,
slab_idx,
(unsigned)meta->class_idx,
(unsigned)meta->capacity,
(unsigned)meta->used,
(unsigned)meta->carved);
atomic_store_explicit(&rel_c7_bind_logged, 1, memory_order_relaxed);
}
#else
static __thread int dbg_c7_bind_logged = 0;
if (class_idx == 7 && dbg_c7_bind_logged == 0) {
fprintf(stderr,
"[DBG_C7_BIND] ss=%p slab=%d old_cls=%u new_cls=%u cap=%u used=%u carved=%u\n",
(void*)ss,
slab_idx,
(unsigned)old_cls,
(unsigned)meta->class_idx,
(unsigned)meta->capacity,
(unsigned)meta->used,
(unsigned)meta->carved);
dbg_c7_bind_logged = 1;
}
#endif
// Bind this slab to TLS for fast subsequent allocations.
// Inline implementation of tiny_tls_bind_slab() to avoid header dependencies.
// Original logic:
@ -109,4 +138,4 @@ static inline int ss_tls_bind_one(int class_idx,
return 1;
}
#endif // HAK_SS_TLS_BIND_BOX_H
#endif // HAK_SS_TLS_BIND_BOX_H

View File

@ -4,6 +4,7 @@
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
// Default: conservative profile (all classes TINY_FIRST).
// This keeps Tiny in the fast path but always allows Pool fallback.
@ -40,5 +41,16 @@ void tiny_route_init(void)
// - 全クラス TINY_FIRSTTiny を使うが必ず Pool fallbackあり
memset(g_tiny_route, ROUTE_TINY_FIRST, sizeof(g_tiny_route));
}
}
#if HAKMEM_BUILD_RELEASE
static int rel_logged = 0;
if (!rel_logged) {
const char* mode =
(g_tiny_route[7] == ROUTE_TINY_ONLY) ? "TINY_ONLY" :
(g_tiny_route[7] == ROUTE_TINY_FIRST) ? "TINY_FIRST" :
(g_tiny_route[7] == ROUTE_POOL_ONLY) ? "POOL_ONLY" : "UNKNOWN";
fprintf(stderr, "[REL_C7_ROUTE] profile=%s route=%s\n", profile, mode);
rel_logged = 1;
}
#endif
}

View File

@ -19,6 +19,7 @@
#define TINY_ROUTE_BOX_H
#include <stdint.h>
#include <stdio.h>
// Routing policy per Tiny class.
typedef enum {
@ -43,8 +44,21 @@ void tiny_route_init(void);
// Uses simple array lookup; class_idx is masked to [0,7] defensively.
static inline TinyRoutePolicy tiny_route_get(int class_idx)
{
return (TinyRoutePolicy)g_tiny_route[class_idx & 7];
TinyRoutePolicy p = (TinyRoutePolicy)g_tiny_route[class_idx & 7];
#if HAKMEM_BUILD_RELEASE
if ((class_idx & 7) == 7) {
static int rel_route_logged = 0;
if (!rel_route_logged) {
const char* mode =
(p == ROUTE_TINY_ONLY) ? "TINY_ONLY" :
(p == ROUTE_TINY_FIRST) ? "TINY_FIRST" :
(p == ROUTE_POOL_ONLY) ? "POOL_ONLY" : "UNKNOWN";
fprintf(stderr, "[REL_C7_ROUTE] via tiny_route_get route=%s\n", mode);
rel_route_logged = 1;
}
}
#endif
return p;
}
#endif // TINY_ROUTE_BOX_H

View File

@ -0,0 +1,102 @@
// tiny_tls_carve_one_block_box.h
// Box: Shared TLS carve helper (linear or freelist) for Tiny classes.
#pragma once
#include "../tiny_tls.h"
#include "../tiny_box_geometry.h"
#include "../tiny_debug_api.h" // tiny_refill_failfast_level(), tiny_failfast_abort_ptr()
#include "c7_meta_used_counter_box.h" // C7 meta->used telemetry (Release/Debug共通)
#include "tiny_next_ptr_box.h"
#include "../superslab/superslab_inline.h"
#include <stdatomic.h>
#include <signal.h>
#if !HAKMEM_BUILD_RELEASE
extern int g_tiny_safe_free;
extern int g_tiny_safe_free_strict;
#endif
enum {
TINY_TLS_CARVE_PATH_NONE = 0,
TINY_TLS_CARVE_PATH_LINEAR = 1,
TINY_TLS_CARVE_PATH_FREELIST = 2,
};
typedef struct TinyTLSCarveOneResult {
void* block;
int path;
} TinyTLSCarveOneResult;
// Carve one block from the current TLS slab.
// Returns .block == NULL on failure. path describes which sub-path was taken.
static inline TinyTLSCarveOneResult
tiny_tls_carve_one_block(TinyTLSSlab* tls, int class_idx)
{
TinyTLSCarveOneResult res = {.block = NULL, .path = TINY_TLS_CARVE_PATH_NONE};
if (!tls) return res;
TinySlabMeta* meta = tls->meta;
if (!meta || !tls->ss || tls->slab_base == NULL) return res;
if (meta->class_idx != (uint8_t)class_idx) return res;
if (tls->slab_idx < 0 || tls->slab_idx >= ss_slabs_capacity(tls->ss)) return res;
// Freelist pop
if (meta->freelist) {
#if !HAKMEM_BUILD_RELEASE
if (__builtin_expect(g_tiny_safe_free, 0)) {
size_t blk = tiny_stride_for_class(meta->class_idx);
uint8_t* base = tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
uintptr_t delta = (uintptr_t)meta->freelist - (uintptr_t)base;
int align_ok = ((delta % blk) == 0);
int range_ok = (delta / blk) < meta->capacity;
if (!align_ok || !range_ok) {
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return res; }
return res;
}
}
#endif
void* block = meta->freelist;
meta->freelist = tiny_next_read(class_idx, block);
meta->used++;
c7_meta_used_note(meta->class_idx, C7_META_USED_SRC_TLS);
ss_active_add(tls->ss, 1);
res.block = block;
res.path = TINY_TLS_CARVE_PATH_FREELIST;
return res;
}
// Linear carve
if (meta->used < meta->capacity) {
size_t block_size = tiny_stride_for_class(meta->class_idx);
void* block = tiny_block_at_index(tls->slab_base, meta->used, block_size);
#if !HAKMEM_BUILD_RELEASE
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
uintptr_t base_ss = (uintptr_t)tls->ss;
size_t ss_size = (size_t)1ULL << tls->ss->lg_size;
uintptr_t p = (uintptr_t)block;
int in_range = (p >= base_ss) && (p < base_ss + ss_size);
int aligned = ((p - (uintptr_t)tls->slab_base) % block_size) == 0;
int idx_ok = (tls->slab_idx >= 0) &&
(tls->slab_idx < ss_slabs_capacity(tls->ss));
if (!in_range || !aligned || !idx_ok || meta->used + 1 > meta->capacity) {
tiny_failfast_abort_ptr("tls_carve_align",
tls->ss,
tls->slab_idx,
block,
"tiny_tls_carve_one_block");
}
}
#endif
meta->used++;
c7_meta_used_note(meta->class_idx, C7_META_USED_SRC_TLS);
ss_active_add(tls->ss, 1);
res.block = block;
res.path = TINY_TLS_CARVE_PATH_LINEAR;
return res;
}
return res;
}

View File

@ -0,0 +1,121 @@
// warm_pool_dbg_box.h
// Box: Debug-only counters for C7 Warm Pool instrumentation.
#pragma once
#include <stdatomic.h>
#include <stdint.h>
#if !HAKMEM_BUILD_RELEASE
#ifdef WARM_POOL_DBG_DEFINE
_Atomic uint64_t g_dbg_c7_warm_pop_attempts = 0;
_Atomic uint64_t g_dbg_c7_warm_pop_hits = 0;
_Atomic uint64_t g_dbg_c7_warm_pop_carve = 0;
_Atomic uint64_t g_dbg_c7_tls_carve_attempts = 0;
_Atomic uint64_t g_dbg_c7_tls_carve_success = 0;
_Atomic uint64_t g_dbg_c7_tls_carve_fail = 0;
_Atomic uint64_t g_dbg_c7_uc_miss_warm_refill = 0;
_Atomic uint64_t g_dbg_c7_uc_miss_tls_refill = 0;
_Atomic uint64_t g_dbg_c7_uc_miss_shared_refill = 0;
#else
extern _Atomic uint64_t g_dbg_c7_warm_pop_attempts;
extern _Atomic uint64_t g_dbg_c7_warm_pop_hits;
extern _Atomic uint64_t g_dbg_c7_warm_pop_carve;
extern _Atomic uint64_t g_dbg_c7_tls_carve_attempts;
extern _Atomic uint64_t g_dbg_c7_tls_carve_success;
extern _Atomic uint64_t g_dbg_c7_tls_carve_fail;
extern _Atomic uint64_t g_dbg_c7_uc_miss_warm_refill;
extern _Atomic uint64_t g_dbg_c7_uc_miss_tls_refill;
extern _Atomic uint64_t g_dbg_c7_uc_miss_shared_refill;
#endif
static inline void warm_pool_dbg_c7_attempt(void) {
atomic_fetch_add_explicit(&g_dbg_c7_warm_pop_attempts, 1, memory_order_relaxed);
}
static inline void warm_pool_dbg_c7_hit(void) {
atomic_fetch_add_explicit(&g_dbg_c7_warm_pop_hits, 1, memory_order_relaxed);
}
static inline void warm_pool_dbg_c7_carve(void) {
atomic_fetch_add_explicit(&g_dbg_c7_warm_pop_carve, 1, memory_order_relaxed);
}
static inline void warm_pool_dbg_c7_tls_attempt(void) {
atomic_fetch_add_explicit(&g_dbg_c7_tls_carve_attempts, 1, memory_order_relaxed);
}
static inline void warm_pool_dbg_c7_tls_success(void) {
atomic_fetch_add_explicit(&g_dbg_c7_tls_carve_success, 1, memory_order_relaxed);
}
static inline void warm_pool_dbg_c7_tls_fail(void) {
atomic_fetch_add_explicit(&g_dbg_c7_tls_carve_fail, 1, memory_order_relaxed);
}
static inline void warm_pool_dbg_c7_uc_miss_warm(void) {
atomic_fetch_add_explicit(&g_dbg_c7_uc_miss_warm_refill, 1, memory_order_relaxed);
}
static inline void warm_pool_dbg_c7_uc_miss_tls(void) {
atomic_fetch_add_explicit(&g_dbg_c7_uc_miss_tls_refill, 1, memory_order_relaxed);
}
static inline void warm_pool_dbg_c7_uc_miss_shared(void) {
atomic_fetch_add_explicit(&g_dbg_c7_uc_miss_shared_refill, 1, memory_order_relaxed);
}
static inline uint64_t warm_pool_dbg_c7_attempts(void) {
return atomic_load_explicit(&g_dbg_c7_warm_pop_attempts, memory_order_relaxed);
}
static inline uint64_t warm_pool_dbg_c7_hits(void) {
return atomic_load_explicit(&g_dbg_c7_warm_pop_hits, memory_order_relaxed);
}
static inline uint64_t warm_pool_dbg_c7_carves(void) {
return atomic_load_explicit(&g_dbg_c7_warm_pop_carve, memory_order_relaxed);
}
static inline uint64_t warm_pool_dbg_c7_tls_attempts(void) {
return atomic_load_explicit(&g_dbg_c7_tls_carve_attempts, memory_order_relaxed);
}
static inline uint64_t warm_pool_dbg_c7_tls_successes(void) {
return atomic_load_explicit(&g_dbg_c7_tls_carve_success, memory_order_relaxed);
}
static inline uint64_t warm_pool_dbg_c7_tls_failures(void) {
return atomic_load_explicit(&g_dbg_c7_tls_carve_fail, memory_order_relaxed);
}
static inline uint64_t warm_pool_dbg_c7_uc_miss_warm_refills(void) {
return atomic_load_explicit(&g_dbg_c7_uc_miss_warm_refill, memory_order_relaxed);
}
static inline uint64_t warm_pool_dbg_c7_uc_miss_tls_refills(void) {
return atomic_load_explicit(&g_dbg_c7_uc_miss_tls_refill, memory_order_relaxed);
}
static inline uint64_t warm_pool_dbg_c7_uc_miss_shared_refills(void) {
return atomic_load_explicit(&g_dbg_c7_uc_miss_shared_refill, memory_order_relaxed);
}
#else
static inline void warm_pool_dbg_c7_attempt(void) { }
static inline void warm_pool_dbg_c7_hit(void) { }
static inline void warm_pool_dbg_c7_carve(void) { }
static inline void warm_pool_dbg_c7_tls_attempt(void) { }
static inline void warm_pool_dbg_c7_tls_success(void) { }
static inline void warm_pool_dbg_c7_tls_fail(void) { }
static inline void warm_pool_dbg_c7_uc_miss_warm(void) { }
static inline void warm_pool_dbg_c7_uc_miss_tls(void) { }
static inline void warm_pool_dbg_c7_uc_miss_shared(void) { }
static inline uint64_t warm_pool_dbg_c7_attempts(void) { return 0; }
static inline uint64_t warm_pool_dbg_c7_hits(void) { return 0; }
static inline uint64_t warm_pool_dbg_c7_carves(void) { return 0; }
static inline uint64_t warm_pool_dbg_c7_tls_attempts(void) { return 0; }
static inline uint64_t warm_pool_dbg_c7_tls_successes(void) { return 0; }
static inline uint64_t warm_pool_dbg_c7_tls_failures(void) { return 0; }
static inline uint64_t warm_pool_dbg_c7_uc_miss_warm_refills(void) { return 0; }
static inline uint64_t warm_pool_dbg_c7_uc_miss_tls_refills(void) { return 0; }
static inline uint64_t warm_pool_dbg_c7_uc_miss_shared_refills(void) { return 0; }
#endif

View File

@ -7,11 +7,51 @@
#define HAK_WARM_POOL_PREFILL_BOX_H
#include <stdint.h>
#include <stdatomic.h>
#include <stdio.h>
#include "../hakmem_tiny_config.h"
#include "../hakmem_tiny_superslab.h"
#include "../tiny_tls.h"
#include "../front/tiny_warm_pool.h"
#include "../box/warm_pool_stats_box.h"
#include "../box/warm_pool_rel_counters_box.h"
static inline void warm_prefill_log_c7_meta(const char* tag, TinyTLSSlab* tls) {
if (!tls || !tls->ss) return;
#if HAKMEM_BUILD_RELEASE
static _Atomic uint32_t rel_logs = 0;
uint32_t n = atomic_fetch_add_explicit(&rel_logs, 1, memory_order_relaxed);
if (n < 4) {
TinySlabMeta* meta = &tls->ss->slabs[tls->slab_idx];
fprintf(stderr,
"[REL_C7_%s] ss=%p slab=%u cls=%u used=%u cap=%u carved=%u freelist=%p\n",
tag,
(void*)tls->ss,
(unsigned)tls->slab_idx,
(unsigned)meta->class_idx,
(unsigned)meta->used,
(unsigned)meta->capacity,
(unsigned)meta->carved,
meta->freelist);
}
#else
static _Atomic uint32_t dbg_logs = 0;
uint32_t n = atomic_fetch_add_explicit(&dbg_logs, 1, memory_order_relaxed);
if (n < 4) {
TinySlabMeta* meta = &tls->ss->slabs[tls->slab_idx];
fprintf(stderr,
"[DBG_C7_%s] ss=%p slab=%u cls=%u used=%u cap=%u carved=%u freelist=%p\n",
tag,
(void*)tls->ss,
(unsigned)tls->slab_idx,
(unsigned)meta->class_idx,
(unsigned)meta->used,
(unsigned)meta->capacity,
(unsigned)meta->carved,
meta->freelist);
}
#endif
}
// Forward declarations
extern __thread TinyTLSSlab g_tls_slabs[TINY_NUM_CLASSES];
@ -45,9 +85,17 @@ extern SuperSlab* superslab_refill(int class_idx);
// Performance: Only triggered when pool is empty, cold path cost
//
static inline int warm_pool_do_prefill(int class_idx, TinyTLSSlab* tls) {
#if HAKMEM_BUILD_RELEASE
if (class_idx == 7) {
warm_pool_rel_c7_prefill_call();
}
#endif
int budget = (tiny_warm_pool_count(class_idx) == 0) ? WARM_POOL_PREFILL_BUDGET : 1;
while (budget > 0) {
if (class_idx == 7) {
warm_prefill_log_c7_meta("PREFILL_META", tls);
}
if (!tls->ss) {
// Need to load a new SuperSlab
if (!superslab_refill(class_idx)) {
@ -61,16 +109,75 @@ static inline int warm_pool_do_prefill(int class_idx, TinyTLSSlab* tls) {
break;
}
// C7 safety: prefer only pristine slabs (used=0 carved=0 freelist=NULL)
if (class_idx == 7) {
TinySlabMeta* meta = &tls->ss->slabs[tls->slab_idx];
if (meta->class_idx == 7 &&
(meta->used > 0 || meta->carved > 0 || meta->freelist != NULL)) {
#if HAKMEM_BUILD_RELEASE
static _Atomic int rel_c7_skip_logged = 0;
if (atomic_load_explicit(&rel_c7_skip_logged, memory_order_relaxed) == 0) {
fprintf(stderr,
"[REL_C7_PREFILL_SKIP_NONEMPTY] ss=%p slab=%u used=%u cap=%u carved=%u freelist=%p\n",
(void*)tls->ss,
(unsigned)tls->slab_idx,
(unsigned)meta->used,
(unsigned)meta->capacity,
(unsigned)meta->carved,
meta->freelist);
atomic_store_explicit(&rel_c7_skip_logged, 1, memory_order_relaxed);
}
#else
static __thread int dbg_c7_skip_logged = 0;
if (dbg_c7_skip_logged < 4) {
fprintf(stderr,
"[DBG_C7_PREFILL_SKIP_NONEMPTY] ss=%p slab=%u used=%u cap=%u carved=%u freelist=%p\n",
(void*)tls->ss,
(unsigned)tls->slab_idx,
(unsigned)meta->used,
(unsigned)meta->capacity,
(unsigned)meta->carved,
meta->freelist);
dbg_c7_skip_logged++;
}
#endif
tls->ss = NULL; // Drop exhausted slab and try another
budget--;
continue;
}
}
if (budget > 1) {
// Prefill mode: push to pool and load another
tiny_warm_pool_push(class_idx, tls->ss);
warm_pool_record_prefilled(class_idx);
tls->ss = NULL; // Force next iteration to refill
budget--;
} else {
// Final slab: keep in TLS for immediate carving
budget = 0;
#if HAKMEM_BUILD_RELEASE
if (class_idx == 7) {
warm_pool_rel_c7_prefill_slab();
}
#else
if (class_idx == 7) {
static __thread int dbg_c7_prefill_logs = 0;
if (dbg_c7_prefill_logs < 8) {
TinySlabMeta* meta = &tls->ss->slabs[tls->slab_idx];
fprintf(stderr,
"[DBG_C7_PREFILL] ss=%p slab=%u used=%u cap=%u carved=%u freelist=%p\n",
(void*)tls->ss,
(unsigned)tls->slab_idx,
(unsigned)meta->used,
(unsigned)meta->capacity,
(unsigned)meta->carved,
meta->freelist);
dbg_c7_prefill_logs++;
}
}
#endif
tls->ss = NULL; // Force next iteration to refill
budget--;
} else {
// Final slab: keep in TLS for immediate carving
budget = 0;
}
}
return 0; // Success

View File

@ -0,0 +1,64 @@
// warm_pool_rel_counters_box.h
// Box: Lightweight Release-side counters for C7 Warm/TLS instrumentation.
#pragma once
#include <stdatomic.h>
#include <stdint.h>
#if HAKMEM_BUILD_RELEASE
#ifdef WARM_POOL_REL_DEFINE
_Atomic uint64_t g_rel_c7_carve_attempts = 0;
_Atomic uint64_t g_rel_c7_carve_success = 0;
_Atomic uint64_t g_rel_c7_carve_zero = 0;
_Atomic uint64_t g_rel_c7_warm_prefill_calls = 0;
_Atomic uint64_t g_rel_c7_warm_prefill_slabs = 0;
#else
extern _Atomic uint64_t g_rel_c7_carve_attempts;
extern _Atomic uint64_t g_rel_c7_carve_success;
extern _Atomic uint64_t g_rel_c7_carve_zero;
extern _Atomic uint64_t g_rel_c7_warm_prefill_calls;
extern _Atomic uint64_t g_rel_c7_warm_prefill_slabs;
#endif
static inline void warm_pool_rel_c7_carve_attempt(void) {
atomic_fetch_add_explicit(&g_rel_c7_carve_attempts, 1, memory_order_relaxed);
}
static inline void warm_pool_rel_c7_carve_success(void) {
atomic_fetch_add_explicit(&g_rel_c7_carve_success, 1, memory_order_relaxed);
}
static inline void warm_pool_rel_c7_carve_zero(void) {
atomic_fetch_add_explicit(&g_rel_c7_carve_zero, 1, memory_order_relaxed);
}
static inline void warm_pool_rel_c7_prefill_call(void) {
atomic_fetch_add_explicit(&g_rel_c7_warm_prefill_calls, 1, memory_order_relaxed);
}
static inline void warm_pool_rel_c7_prefill_slab(void) {
atomic_fetch_add_explicit(&g_rel_c7_warm_prefill_slabs, 1, memory_order_relaxed);
}
static inline uint64_t warm_pool_rel_c7_carve_attempts(void) {
return atomic_load_explicit(&g_rel_c7_carve_attempts, memory_order_relaxed);
}
static inline uint64_t warm_pool_rel_c7_carve_successes(void) {
return atomic_load_explicit(&g_rel_c7_carve_success, memory_order_relaxed);
}
static inline uint64_t warm_pool_rel_c7_carve_zeroes(void) {
return atomic_load_explicit(&g_rel_c7_carve_zero, memory_order_relaxed);
}
static inline uint64_t warm_pool_rel_c7_prefill_calls(void) {
return atomic_load_explicit(&g_rel_c7_warm_prefill_calls, memory_order_relaxed);
}
static inline uint64_t warm_pool_rel_c7_prefill_slabs(void) {
return atomic_load_explicit(&g_rel_c7_warm_prefill_slabs, memory_order_relaxed);
}
#else
static inline void warm_pool_rel_c7_carve_attempt(void) { }
static inline void warm_pool_rel_c7_carve_success(void) { }
static inline void warm_pool_rel_c7_carve_zero(void) { }
static inline void warm_pool_rel_c7_prefill_call(void) { }
static inline void warm_pool_rel_c7_prefill_slab(void) { }
static inline uint64_t warm_pool_rel_c7_carve_attempts(void) { return 0; }
static inline uint64_t warm_pool_rel_c7_carve_successes(void) { return 0; }
static inline uint64_t warm_pool_rel_c7_carve_zeroes(void) { return 0; }
static inline uint64_t warm_pool_rel_c7_prefill_calls(void) { return 0; }
static inline uint64_t warm_pool_rel_c7_prefill_slabs(void) { return 0; }
#endif

View File

@ -0,0 +1,57 @@
// warm_tls_bind_logger_box.h
// Box: Warm TLS Bind experiment logging with simple throttling.
#pragma once
#include "../hakmem_tiny_superslab.h"
#include <stdatomic.h>
#include <stdio.h>
#include <stdlib.h>
#if !HAKMEM_BUILD_RELEASE
static _Atomic int g_warm_tls_bind_log_limit = -1;
static _Atomic int g_warm_tls_bind_log_count = 0;
static inline int warm_tls_bind_log_limit(void) {
int limit = atomic_load_explicit(&g_warm_tls_bind_log_limit, memory_order_relaxed);
if (__builtin_expect(limit == -1, 0)) {
const char* e = getenv("HAKMEM_WARM_TLS_BIND_LOG_MAX");
int parsed = (e && *e) ? atoi(e) : 1;
atomic_store_explicit(&g_warm_tls_bind_log_limit, parsed, memory_order_relaxed);
limit = parsed;
}
return limit;
}
static inline int warm_tls_bind_log_acquire(void) {
int limit = warm_tls_bind_log_limit();
int prev = atomic_fetch_add_explicit(&g_warm_tls_bind_log_count, 1, memory_order_relaxed);
return prev < limit;
}
static inline void warm_tls_bind_log_success(SuperSlab* ss, int slab_idx) {
if (warm_tls_bind_log_acquire()) {
fprintf(stderr, "[WARM_TLS_BIND] C7 bind success: ss=%p slab=%d\n",
(void*)ss, slab_idx);
}
}
static inline void warm_tls_bind_log_tls_carve(SuperSlab* ss, int slab_idx, void* block) {
if (warm_tls_bind_log_acquire()) {
fprintf(stderr,
"[WARM_TLS_BIND] C7 TLS carve success: ss=%p slab=%d block=%p\n",
(void*)ss, slab_idx, block);
}
}
static inline void warm_tls_bind_log_tls_fail(SuperSlab* ss, int slab_idx) {
if (warm_tls_bind_log_acquire()) {
fprintf(stderr,
"[WARM_TLS_BIND] C7 TLS carve failed, fallback (ss=%p slab=%d)\n",
(void*)ss, slab_idx);
}
}
#else
static inline void warm_tls_bind_log_success(SuperSlab* ss, int slab_idx) { (void)ss; (void)slab_idx; }
static inline void warm_tls_bind_log_tls_carve(SuperSlab* ss, int slab_idx, void* block) { (void)ss; (void)slab_idx; (void)block; }
static inline void warm_tls_bind_log_tls_fail(SuperSlab* ss, int slab_idx) { (void)ss; (void)slab_idx; }
#endif