Fix C7 warm/TLS Release path and unify debug instrumentation
This commit is contained in:
59
core/box/c7_meta_used_counter_box.h
Normal file
59
core/box/c7_meta_used_counter_box.h
Normal file
@ -0,0 +1,59 @@
|
||||
// c7_meta_used_counter_box.h
|
||||
// Box: C7 meta->used increment counters (Release/Debug共通)
|
||||
#pragma once
|
||||
|
||||
#include <stdatomic.h>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef enum C7MetaUsedSource {
|
||||
C7_META_USED_SRC_UNKNOWN = 0,
|
||||
C7_META_USED_SRC_BACKEND = 1,
|
||||
C7_META_USED_SRC_TLS = 2,
|
||||
C7_META_USED_SRC_FRONT = 3,
|
||||
} C7MetaUsedSource;
|
||||
|
||||
#ifdef C7_META_COUNTER_DEFINE
|
||||
#define C7_META_COUNTER_EXTERN
|
||||
#else
|
||||
#define C7_META_COUNTER_EXTERN extern
|
||||
#endif
|
||||
|
||||
C7_META_COUNTER_EXTERN _Atomic uint64_t g_c7_meta_used_inc_total;
|
||||
C7_META_COUNTER_EXTERN _Atomic uint64_t g_c7_meta_used_inc_backend;
|
||||
C7_META_COUNTER_EXTERN _Atomic uint64_t g_c7_meta_used_inc_tls;
|
||||
C7_META_COUNTER_EXTERN _Atomic uint64_t g_c7_meta_used_inc_front;
|
||||
|
||||
static inline void c7_meta_used_note(int class_idx, C7MetaUsedSource src) {
|
||||
if (__builtin_expect(class_idx != 7, 1)) {
|
||||
return;
|
||||
}
|
||||
atomic_fetch_add_explicit(&g_c7_meta_used_inc_total, 1, memory_order_relaxed);
|
||||
switch (src) {
|
||||
case C7_META_USED_SRC_BACKEND:
|
||||
atomic_fetch_add_explicit(&g_c7_meta_used_inc_backend, 1, memory_order_relaxed);
|
||||
break;
|
||||
case C7_META_USED_SRC_TLS:
|
||||
atomic_fetch_add_explicit(&g_c7_meta_used_inc_tls, 1, memory_order_relaxed);
|
||||
break;
|
||||
case C7_META_USED_SRC_FRONT:
|
||||
atomic_fetch_add_explicit(&g_c7_meta_used_inc_front, 1, memory_order_relaxed);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static inline uint64_t c7_meta_used_total(void) {
|
||||
return atomic_load_explicit(&g_c7_meta_used_inc_total, memory_order_relaxed);
|
||||
}
|
||||
static inline uint64_t c7_meta_used_backend(void) {
|
||||
return atomic_load_explicit(&g_c7_meta_used_inc_backend, memory_order_relaxed);
|
||||
}
|
||||
static inline uint64_t c7_meta_used_tls(void) {
|
||||
return atomic_load_explicit(&g_c7_meta_used_inc_tls, memory_order_relaxed);
|
||||
}
|
||||
static inline uint64_t c7_meta_used_front(void) {
|
||||
return atomic_load_explicit(&g_c7_meta_used_inc_front, memory_order_relaxed);
|
||||
}
|
||||
|
||||
#undef C7_META_COUNTER_EXTERN
|
||||
@ -15,6 +15,7 @@
|
||||
#include "tiny_header_box.h" // Header Box: Single Source of Truth for header operations
|
||||
#include "../tiny_refill_opt.h" // TinyRefillChain, trc_linear_carve()
|
||||
#include "../tiny_box_geometry.h" // tiny_stride_for_class(), tiny_slab_base_for_geometry()
|
||||
#include "c7_meta_used_counter_box.h"
|
||||
|
||||
// External declarations
|
||||
extern __thread TinyTLSSlab g_tls_slabs[TINY_NUM_CLASSES];
|
||||
@ -191,6 +192,7 @@ uint32_t box_carve_and_push_with_freelist(int class_idx, uint32_t want) {
|
||||
void* p = meta->freelist;
|
||||
meta->freelist = tiny_next_read(class_idx, p);
|
||||
meta->used++;
|
||||
c7_meta_used_note(class_idx, C7_META_USED_SRC_FRONT);
|
||||
|
||||
// CRITICAL FIX: Restore header BEFORE pushing to TLS SLL
|
||||
// Freelist blocks may have stale data at offset 0
|
||||
|
||||
@ -41,7 +41,7 @@ core/box/carve_push_box.o: core/box/carve_push_box.c \
|
||||
core/box/../tiny_region_id.h core/box/../hakmem_tiny_integrity.h \
|
||||
core/box/../box/slab_freelist_atomic.h core/box/tiny_header_box.h \
|
||||
core/box/../tiny_refill_opt.h core/box/../box/tls_sll_box.h \
|
||||
core/box/../tiny_box_geometry.h
|
||||
core/box/../tiny_box_geometry.h core/box/c7_meta_used_counter_box.h
|
||||
core/box/../hakmem_tiny.h:
|
||||
core/box/../hakmem_build_flags.h:
|
||||
core/box/../hakmem_trace.h:
|
||||
@ -116,3 +116,4 @@ core/box/tiny_header_box.h:
|
||||
core/box/../tiny_refill_opt.h:
|
||||
core/box/../box/tls_sll_box.h:
|
||||
core/box/../tiny_box_geometry.h:
|
||||
core/box/c7_meta_used_counter_box.h:
|
||||
|
||||
@ -9,12 +9,15 @@
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdatomic.h>
|
||||
#include "../hakmem_tiny_config.h"
|
||||
#include "../hakmem_tiny_superslab.h"
|
||||
#include "../superslab/superslab_inline.h"
|
||||
#include "../tiny_box_geometry.h"
|
||||
#include "../box/tiny_next_ptr_box.h"
|
||||
#include "../box/pagefault_telemetry_box.h"
|
||||
#include "c7_meta_used_counter_box.h"
|
||||
|
||||
// ============================================================================
|
||||
// Slab Carving API (Inline for Hot Path)
|
||||
@ -46,11 +49,31 @@ static inline int slab_carve_from_ss(int class_idx, SuperSlab* ss,
|
||||
|
||||
// Find an available slab in this SuperSlab
|
||||
int cap = ss_slabs_capacity(ss);
|
||||
#if HAKMEM_BUILD_RELEASE
|
||||
static _Atomic int rel_c7_meta_logged = 0;
|
||||
TinySlabMeta* rel_c7_meta = NULL;
|
||||
int rel_c7_meta_idx = -1;
|
||||
#else
|
||||
static __thread int dbg_c7_meta_logged = 0;
|
||||
TinySlabMeta* dbg_c7_meta = NULL;
|
||||
int dbg_c7_meta_idx = -1;
|
||||
#endif
|
||||
for (int slab_idx = 0; slab_idx < cap; slab_idx++) {
|
||||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||
|
||||
// Check if this slab matches our class and has capacity
|
||||
if (meta->class_idx != (uint8_t)class_idx) continue;
|
||||
#if HAKMEM_BUILD_RELEASE
|
||||
if (class_idx == 7 && atomic_load_explicit(&rel_c7_meta_logged, memory_order_relaxed) == 0 && !rel_c7_meta) {
|
||||
rel_c7_meta = meta;
|
||||
rel_c7_meta_idx = slab_idx;
|
||||
}
|
||||
#else
|
||||
if (class_idx == 7 && dbg_c7_meta_logged == 0 && !dbg_c7_meta) {
|
||||
dbg_c7_meta = meta;
|
||||
dbg_c7_meta_idx = slab_idx;
|
||||
}
|
||||
#endif
|
||||
if (meta->used >= meta->capacity && !meta->freelist) continue;
|
||||
|
||||
// Carve blocks from this slab
|
||||
@ -73,6 +96,7 @@ static inline int slab_carve_from_ss(int class_idx, SuperSlab* ss,
|
||||
|
||||
meta->freelist = next_node;
|
||||
meta->used++;
|
||||
c7_meta_used_note(class_idx, C7_META_USED_SRC_FRONT);
|
||||
|
||||
} else if (meta->carved < meta->capacity) {
|
||||
// Linear carve
|
||||
@ -84,6 +108,7 @@ static inline int slab_carve_from_ss(int class_idx, SuperSlab* ss,
|
||||
|
||||
meta->carved++;
|
||||
meta->used++;
|
||||
c7_meta_used_note(class_idx, C7_META_USED_SRC_FRONT);
|
||||
|
||||
} else {
|
||||
break; // This slab exhausted
|
||||
@ -99,6 +124,48 @@ static inline int slab_carve_from_ss(int class_idx, SuperSlab* ss,
|
||||
// If this slab had no freelist and no carved capacity, continue to next
|
||||
}
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
static __thread int dbg_c7_slab_carve_zero_logs = 0;
|
||||
if (class_idx == 7 && dbg_c7_slab_carve_zero_logs < 10) {
|
||||
fprintf(stderr, "[C7_SLAB_CARVE_ZERO] ss=%p no blocks carved\n", (void*)ss);
|
||||
dbg_c7_slab_carve_zero_logs++;
|
||||
}
|
||||
#endif
|
||||
#if HAKMEM_BUILD_RELEASE
|
||||
if (class_idx == 7 &&
|
||||
atomic_load_explicit(&rel_c7_meta_logged, memory_order_relaxed) == 0 &&
|
||||
rel_c7_meta) {
|
||||
size_t bs = tiny_stride_for_class(class_idx);
|
||||
fprintf(stderr,
|
||||
"[REL_C7_CARVE_META] ss=%p slab=%d cls=%u used=%u cap=%u carved=%u freelist=%p stride=%zu slabs_cap=%d\n",
|
||||
(void*)ss,
|
||||
rel_c7_meta_idx,
|
||||
(unsigned)rel_c7_meta->class_idx,
|
||||
(unsigned)rel_c7_meta->used,
|
||||
(unsigned)rel_c7_meta->capacity,
|
||||
(unsigned)rel_c7_meta->carved,
|
||||
rel_c7_meta->freelist,
|
||||
bs,
|
||||
cap);
|
||||
atomic_store_explicit(&rel_c7_meta_logged, 1, memory_order_relaxed);
|
||||
}
|
||||
#else
|
||||
if (class_idx == 7 && dbg_c7_meta_logged == 0 && dbg_c7_meta) {
|
||||
size_t bs = tiny_stride_for_class(class_idx);
|
||||
fprintf(stderr,
|
||||
"[DBG_C7_CARVE_META] ss=%p slab=%d cls=%u used=%u cap=%u carved=%u freelist=%p stride=%zu slabs_cap=%d\n",
|
||||
(void*)ss,
|
||||
dbg_c7_meta_idx,
|
||||
(unsigned)dbg_c7_meta->class_idx,
|
||||
(unsigned)dbg_c7_meta->used,
|
||||
(unsigned)dbg_c7_meta->capacity,
|
||||
(unsigned)dbg_c7_meta->carved,
|
||||
dbg_c7_meta->freelist,
|
||||
bs,
|
||||
cap);
|
||||
dbg_c7_meta_logged = 1;
|
||||
}
|
||||
#endif
|
||||
return 0; // No slab in this SuperSlab had available capacity
|
||||
}
|
||||
|
||||
|
||||
26
core/box/ss_slab_reset_box.h
Normal file
26
core/box/ss_slab_reset_box.h
Normal file
@ -0,0 +1,26 @@
|
||||
// ss_slab_reset_box.h
|
||||
// Box: Reset TinySlabMeta for reuse (C7 diagnostics-friendly)
|
||||
#pragma once
|
||||
|
||||
#include "ss_slab_meta_box.h"
|
||||
#include "../superslab/superslab_inline.h"
|
||||
#include <stdatomic.h>
|
||||
|
||||
static inline void ss_slab_reset_meta_for_tiny(SuperSlab* ss,
|
||||
int slab_idx,
|
||||
int class_idx)
|
||||
{
|
||||
if (!ss) return;
|
||||
if (slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) return;
|
||||
|
||||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||
meta->used = 0;
|
||||
meta->carved = 0;
|
||||
meta->freelist = NULL;
|
||||
meta->class_idx = (uint8_t)class_idx;
|
||||
ss->class_map[slab_idx] = (uint8_t)class_idx;
|
||||
|
||||
// Reset remote queue state to avoid stale pending frees on reuse.
|
||||
atomic_store_explicit(&ss->remote_heads[slab_idx], 0, memory_order_relaxed);
|
||||
atomic_store_explicit(&ss->remote_counts[slab_idx], 0, memory_order_relaxed);
|
||||
}
|
||||
@ -13,6 +13,7 @@
|
||||
#include "../hakmem_tiny_config.h"
|
||||
#include "../box/tiny_page_box.h" // For tiny_page_box_on_new_slab()
|
||||
#include <stdio.h>
|
||||
#include <stdatomic.h>
|
||||
|
||||
// Forward declaration if not included
|
||||
// CRITICAL FIX: type must match core/hakmem_tiny_config.h (const size_t, not uint16_t)
|
||||
@ -64,9 +65,7 @@ static inline int ss_tls_bind_one(int class_idx,
|
||||
// superslab_init_slab() only sets it if meta->class_idx==255.
|
||||
// We must explicitly set it to the requested class to avoid C0/C7 confusion.
|
||||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
uint8_t old_cls = meta->class_idx;
|
||||
#endif
|
||||
meta->class_idx = (uint8_t)class_idx;
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
if (class_idx == 7 && old_cls != class_idx) {
|
||||
@ -75,6 +74,36 @@ static inline int ss_tls_bind_one(int class_idx,
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAKMEM_BUILD_RELEASE
|
||||
static _Atomic int rel_c7_bind_logged = 0;
|
||||
if (class_idx == 7 &&
|
||||
atomic_load_explicit(&rel_c7_bind_logged, memory_order_relaxed) == 0) {
|
||||
fprintf(stderr,
|
||||
"[REL_C7_BIND] ss=%p slab=%d cls=%u cap=%u used=%u carved=%u\n",
|
||||
(void*)ss,
|
||||
slab_idx,
|
||||
(unsigned)meta->class_idx,
|
||||
(unsigned)meta->capacity,
|
||||
(unsigned)meta->used,
|
||||
(unsigned)meta->carved);
|
||||
atomic_store_explicit(&rel_c7_bind_logged, 1, memory_order_relaxed);
|
||||
}
|
||||
#else
|
||||
static __thread int dbg_c7_bind_logged = 0;
|
||||
if (class_idx == 7 && dbg_c7_bind_logged == 0) {
|
||||
fprintf(stderr,
|
||||
"[DBG_C7_BIND] ss=%p slab=%d old_cls=%u new_cls=%u cap=%u used=%u carved=%u\n",
|
||||
(void*)ss,
|
||||
slab_idx,
|
||||
(unsigned)old_cls,
|
||||
(unsigned)meta->class_idx,
|
||||
(unsigned)meta->capacity,
|
||||
(unsigned)meta->used,
|
||||
(unsigned)meta->carved);
|
||||
dbg_c7_bind_logged = 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Bind this slab to TLS for fast subsequent allocations.
|
||||
// Inline implementation of tiny_tls_bind_slab() to avoid header dependencies.
|
||||
// Original logic:
|
||||
@ -109,4 +138,4 @@ static inline int ss_tls_bind_one(int class_idx,
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif // HAK_SS_TLS_BIND_BOX_H
|
||||
#endif // HAK_SS_TLS_BIND_BOX_H
|
||||
|
||||
@ -4,6 +4,7 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
// Default: conservative profile (all classes TINY_FIRST).
|
||||
// This keeps Tiny in the fast path but always allows Pool fallback.
|
||||
@ -40,5 +41,16 @@ void tiny_route_init(void)
|
||||
// - 全クラス TINY_FIRST(Tiny を使うが必ず Pool fallbackあり)
|
||||
memset(g_tiny_route, ROUTE_TINY_FIRST, sizeof(g_tiny_route));
|
||||
}
|
||||
}
|
||||
|
||||
#if HAKMEM_BUILD_RELEASE
|
||||
static int rel_logged = 0;
|
||||
if (!rel_logged) {
|
||||
const char* mode =
|
||||
(g_tiny_route[7] == ROUTE_TINY_ONLY) ? "TINY_ONLY" :
|
||||
(g_tiny_route[7] == ROUTE_TINY_FIRST) ? "TINY_FIRST" :
|
||||
(g_tiny_route[7] == ROUTE_POOL_ONLY) ? "POOL_ONLY" : "UNKNOWN";
|
||||
fprintf(stderr, "[REL_C7_ROUTE] profile=%s route=%s\n", profile, mode);
|
||||
rel_logged = 1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -19,6 +19,7 @@
|
||||
#define TINY_ROUTE_BOX_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
// Routing policy per Tiny class.
|
||||
typedef enum {
|
||||
@ -43,8 +44,21 @@ void tiny_route_init(void);
|
||||
// Uses simple array lookup; class_idx is masked to [0,7] defensively.
|
||||
static inline TinyRoutePolicy tiny_route_get(int class_idx)
|
||||
{
|
||||
return (TinyRoutePolicy)g_tiny_route[class_idx & 7];
|
||||
TinyRoutePolicy p = (TinyRoutePolicy)g_tiny_route[class_idx & 7];
|
||||
#if HAKMEM_BUILD_RELEASE
|
||||
if ((class_idx & 7) == 7) {
|
||||
static int rel_route_logged = 0;
|
||||
if (!rel_route_logged) {
|
||||
const char* mode =
|
||||
(p == ROUTE_TINY_ONLY) ? "TINY_ONLY" :
|
||||
(p == ROUTE_TINY_FIRST) ? "TINY_FIRST" :
|
||||
(p == ROUTE_POOL_ONLY) ? "POOL_ONLY" : "UNKNOWN";
|
||||
fprintf(stderr, "[REL_C7_ROUTE] via tiny_route_get route=%s\n", mode);
|
||||
rel_route_logged = 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return p;
|
||||
}
|
||||
|
||||
#endif // TINY_ROUTE_BOX_H
|
||||
|
||||
|
||||
102
core/box/tiny_tls_carve_one_block_box.h
Normal file
102
core/box/tiny_tls_carve_one_block_box.h
Normal file
@ -0,0 +1,102 @@
|
||||
// tiny_tls_carve_one_block_box.h
|
||||
// Box: Shared TLS carve helper (linear or freelist) for Tiny classes.
|
||||
#pragma once
|
||||
|
||||
#include "../tiny_tls.h"
|
||||
#include "../tiny_box_geometry.h"
|
||||
#include "../tiny_debug_api.h" // tiny_refill_failfast_level(), tiny_failfast_abort_ptr()
|
||||
#include "c7_meta_used_counter_box.h" // C7 meta->used telemetry (Release/Debug共通)
|
||||
#include "tiny_next_ptr_box.h"
|
||||
#include "../superslab/superslab_inline.h"
|
||||
#include <stdatomic.h>
|
||||
#include <signal.h>
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
extern int g_tiny_safe_free;
|
||||
extern int g_tiny_safe_free_strict;
|
||||
#endif
|
||||
|
||||
enum {
|
||||
TINY_TLS_CARVE_PATH_NONE = 0,
|
||||
TINY_TLS_CARVE_PATH_LINEAR = 1,
|
||||
TINY_TLS_CARVE_PATH_FREELIST = 2,
|
||||
};
|
||||
|
||||
typedef struct TinyTLSCarveOneResult {
|
||||
void* block;
|
||||
int path;
|
||||
} TinyTLSCarveOneResult;
|
||||
|
||||
// Carve one block from the current TLS slab.
|
||||
// Returns .block == NULL on failure. path describes which sub-path was taken.
|
||||
static inline TinyTLSCarveOneResult
|
||||
tiny_tls_carve_one_block(TinyTLSSlab* tls, int class_idx)
|
||||
{
|
||||
TinyTLSCarveOneResult res = {.block = NULL, .path = TINY_TLS_CARVE_PATH_NONE};
|
||||
|
||||
if (!tls) return res;
|
||||
|
||||
TinySlabMeta* meta = tls->meta;
|
||||
if (!meta || !tls->ss || tls->slab_base == NULL) return res;
|
||||
if (meta->class_idx != (uint8_t)class_idx) return res;
|
||||
if (tls->slab_idx < 0 || tls->slab_idx >= ss_slabs_capacity(tls->ss)) return res;
|
||||
|
||||
// Freelist pop
|
||||
if (meta->freelist) {
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
if (__builtin_expect(g_tiny_safe_free, 0)) {
|
||||
size_t blk = tiny_stride_for_class(meta->class_idx);
|
||||
uint8_t* base = tiny_slab_base_for_geometry(tls->ss, tls->slab_idx);
|
||||
uintptr_t delta = (uintptr_t)meta->freelist - (uintptr_t)base;
|
||||
int align_ok = ((delta % blk) == 0);
|
||||
int range_ok = (delta / blk) < meta->capacity;
|
||||
if (!align_ok || !range_ok) {
|
||||
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return res; }
|
||||
return res;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
void* block = meta->freelist;
|
||||
meta->freelist = tiny_next_read(class_idx, block);
|
||||
meta->used++;
|
||||
c7_meta_used_note(meta->class_idx, C7_META_USED_SRC_TLS);
|
||||
ss_active_add(tls->ss, 1);
|
||||
res.block = block;
|
||||
res.path = TINY_TLS_CARVE_PATH_FREELIST;
|
||||
return res;
|
||||
}
|
||||
|
||||
// Linear carve
|
||||
if (meta->used < meta->capacity) {
|
||||
size_t block_size = tiny_stride_for_class(meta->class_idx);
|
||||
void* block = tiny_block_at_index(tls->slab_base, meta->used, block_size);
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
|
||||
uintptr_t base_ss = (uintptr_t)tls->ss;
|
||||
size_t ss_size = (size_t)1ULL << tls->ss->lg_size;
|
||||
uintptr_t p = (uintptr_t)block;
|
||||
int in_range = (p >= base_ss) && (p < base_ss + ss_size);
|
||||
int aligned = ((p - (uintptr_t)tls->slab_base) % block_size) == 0;
|
||||
int idx_ok = (tls->slab_idx >= 0) &&
|
||||
(tls->slab_idx < ss_slabs_capacity(tls->ss));
|
||||
if (!in_range || !aligned || !idx_ok || meta->used + 1 > meta->capacity) {
|
||||
tiny_failfast_abort_ptr("tls_carve_align",
|
||||
tls->ss,
|
||||
tls->slab_idx,
|
||||
block,
|
||||
"tiny_tls_carve_one_block");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
meta->used++;
|
||||
c7_meta_used_note(meta->class_idx, C7_META_USED_SRC_TLS);
|
||||
ss_active_add(tls->ss, 1);
|
||||
res.block = block;
|
||||
res.path = TINY_TLS_CARVE_PATH_LINEAR;
|
||||
return res;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
121
core/box/warm_pool_dbg_box.h
Normal file
121
core/box/warm_pool_dbg_box.h
Normal file
@ -0,0 +1,121 @@
|
||||
// warm_pool_dbg_box.h
|
||||
// Box: Debug-only counters for C7 Warm Pool instrumentation.
|
||||
#pragma once
|
||||
|
||||
#include <stdatomic.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
#ifdef WARM_POOL_DBG_DEFINE
|
||||
_Atomic uint64_t g_dbg_c7_warm_pop_attempts = 0;
|
||||
_Atomic uint64_t g_dbg_c7_warm_pop_hits = 0;
|
||||
_Atomic uint64_t g_dbg_c7_warm_pop_carve = 0;
|
||||
_Atomic uint64_t g_dbg_c7_tls_carve_attempts = 0;
|
||||
_Atomic uint64_t g_dbg_c7_tls_carve_success = 0;
|
||||
_Atomic uint64_t g_dbg_c7_tls_carve_fail = 0;
|
||||
_Atomic uint64_t g_dbg_c7_uc_miss_warm_refill = 0;
|
||||
_Atomic uint64_t g_dbg_c7_uc_miss_tls_refill = 0;
|
||||
_Atomic uint64_t g_dbg_c7_uc_miss_shared_refill = 0;
|
||||
#else
|
||||
extern _Atomic uint64_t g_dbg_c7_warm_pop_attempts;
|
||||
extern _Atomic uint64_t g_dbg_c7_warm_pop_hits;
|
||||
extern _Atomic uint64_t g_dbg_c7_warm_pop_carve;
|
||||
extern _Atomic uint64_t g_dbg_c7_tls_carve_attempts;
|
||||
extern _Atomic uint64_t g_dbg_c7_tls_carve_success;
|
||||
extern _Atomic uint64_t g_dbg_c7_tls_carve_fail;
|
||||
extern _Atomic uint64_t g_dbg_c7_uc_miss_warm_refill;
|
||||
extern _Atomic uint64_t g_dbg_c7_uc_miss_tls_refill;
|
||||
extern _Atomic uint64_t g_dbg_c7_uc_miss_shared_refill;
|
||||
#endif
|
||||
|
||||
static inline void warm_pool_dbg_c7_attempt(void) {
|
||||
atomic_fetch_add_explicit(&g_dbg_c7_warm_pop_attempts, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
static inline void warm_pool_dbg_c7_hit(void) {
|
||||
atomic_fetch_add_explicit(&g_dbg_c7_warm_pop_hits, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
static inline void warm_pool_dbg_c7_carve(void) {
|
||||
atomic_fetch_add_explicit(&g_dbg_c7_warm_pop_carve, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
static inline void warm_pool_dbg_c7_tls_attempt(void) {
|
||||
atomic_fetch_add_explicit(&g_dbg_c7_tls_carve_attempts, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
static inline void warm_pool_dbg_c7_tls_success(void) {
|
||||
atomic_fetch_add_explicit(&g_dbg_c7_tls_carve_success, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
static inline void warm_pool_dbg_c7_tls_fail(void) {
|
||||
atomic_fetch_add_explicit(&g_dbg_c7_tls_carve_fail, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
static inline void warm_pool_dbg_c7_uc_miss_warm(void) {
|
||||
atomic_fetch_add_explicit(&g_dbg_c7_uc_miss_warm_refill, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
static inline void warm_pool_dbg_c7_uc_miss_tls(void) {
|
||||
atomic_fetch_add_explicit(&g_dbg_c7_uc_miss_tls_refill, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
static inline void warm_pool_dbg_c7_uc_miss_shared(void) {
|
||||
atomic_fetch_add_explicit(&g_dbg_c7_uc_miss_shared_refill, 1, memory_order_relaxed);
|
||||
}
|
||||
|
||||
static inline uint64_t warm_pool_dbg_c7_attempts(void) {
|
||||
return atomic_load_explicit(&g_dbg_c7_warm_pop_attempts, memory_order_relaxed);
|
||||
}
|
||||
|
||||
static inline uint64_t warm_pool_dbg_c7_hits(void) {
|
||||
return atomic_load_explicit(&g_dbg_c7_warm_pop_hits, memory_order_relaxed);
|
||||
}
|
||||
|
||||
static inline uint64_t warm_pool_dbg_c7_carves(void) {
|
||||
return atomic_load_explicit(&g_dbg_c7_warm_pop_carve, memory_order_relaxed);
|
||||
}
|
||||
|
||||
static inline uint64_t warm_pool_dbg_c7_tls_attempts(void) {
|
||||
return atomic_load_explicit(&g_dbg_c7_tls_carve_attempts, memory_order_relaxed);
|
||||
}
|
||||
|
||||
static inline uint64_t warm_pool_dbg_c7_tls_successes(void) {
|
||||
return atomic_load_explicit(&g_dbg_c7_tls_carve_success, memory_order_relaxed);
|
||||
}
|
||||
|
||||
static inline uint64_t warm_pool_dbg_c7_tls_failures(void) {
|
||||
return atomic_load_explicit(&g_dbg_c7_tls_carve_fail, memory_order_relaxed);
|
||||
}
|
||||
|
||||
static inline uint64_t warm_pool_dbg_c7_uc_miss_warm_refills(void) {
|
||||
return atomic_load_explicit(&g_dbg_c7_uc_miss_warm_refill, memory_order_relaxed);
|
||||
}
|
||||
|
||||
static inline uint64_t warm_pool_dbg_c7_uc_miss_tls_refills(void) {
|
||||
return atomic_load_explicit(&g_dbg_c7_uc_miss_tls_refill, memory_order_relaxed);
|
||||
}
|
||||
|
||||
static inline uint64_t warm_pool_dbg_c7_uc_miss_shared_refills(void) {
|
||||
return atomic_load_explicit(&g_dbg_c7_uc_miss_shared_refill, memory_order_relaxed);
|
||||
}
|
||||
#else
|
||||
static inline void warm_pool_dbg_c7_attempt(void) { }
|
||||
static inline void warm_pool_dbg_c7_hit(void) { }
|
||||
static inline void warm_pool_dbg_c7_carve(void) { }
|
||||
static inline void warm_pool_dbg_c7_tls_attempt(void) { }
|
||||
static inline void warm_pool_dbg_c7_tls_success(void) { }
|
||||
static inline void warm_pool_dbg_c7_tls_fail(void) { }
|
||||
static inline void warm_pool_dbg_c7_uc_miss_warm(void) { }
|
||||
static inline void warm_pool_dbg_c7_uc_miss_tls(void) { }
|
||||
static inline void warm_pool_dbg_c7_uc_miss_shared(void) { }
|
||||
static inline uint64_t warm_pool_dbg_c7_attempts(void) { return 0; }
|
||||
static inline uint64_t warm_pool_dbg_c7_hits(void) { return 0; }
|
||||
static inline uint64_t warm_pool_dbg_c7_carves(void) { return 0; }
|
||||
static inline uint64_t warm_pool_dbg_c7_tls_attempts(void) { return 0; }
|
||||
static inline uint64_t warm_pool_dbg_c7_tls_successes(void) { return 0; }
|
||||
static inline uint64_t warm_pool_dbg_c7_tls_failures(void) { return 0; }
|
||||
static inline uint64_t warm_pool_dbg_c7_uc_miss_warm_refills(void) { return 0; }
|
||||
static inline uint64_t warm_pool_dbg_c7_uc_miss_tls_refills(void) { return 0; }
|
||||
static inline uint64_t warm_pool_dbg_c7_uc_miss_shared_refills(void) { return 0; }
|
||||
#endif
|
||||
@ -7,11 +7,51 @@
|
||||
#define HAK_WARM_POOL_PREFILL_BOX_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdatomic.h>
|
||||
#include <stdio.h>
|
||||
#include "../hakmem_tiny_config.h"
|
||||
#include "../hakmem_tiny_superslab.h"
|
||||
#include "../tiny_tls.h"
|
||||
#include "../front/tiny_warm_pool.h"
|
||||
#include "../box/warm_pool_stats_box.h"
|
||||
#include "../box/warm_pool_rel_counters_box.h"
|
||||
|
||||
static inline void warm_prefill_log_c7_meta(const char* tag, TinyTLSSlab* tls) {
|
||||
if (!tls || !tls->ss) return;
|
||||
#if HAKMEM_BUILD_RELEASE
|
||||
static _Atomic uint32_t rel_logs = 0;
|
||||
uint32_t n = atomic_fetch_add_explicit(&rel_logs, 1, memory_order_relaxed);
|
||||
if (n < 4) {
|
||||
TinySlabMeta* meta = &tls->ss->slabs[tls->slab_idx];
|
||||
fprintf(stderr,
|
||||
"[REL_C7_%s] ss=%p slab=%u cls=%u used=%u cap=%u carved=%u freelist=%p\n",
|
||||
tag,
|
||||
(void*)tls->ss,
|
||||
(unsigned)tls->slab_idx,
|
||||
(unsigned)meta->class_idx,
|
||||
(unsigned)meta->used,
|
||||
(unsigned)meta->capacity,
|
||||
(unsigned)meta->carved,
|
||||
meta->freelist);
|
||||
}
|
||||
#else
|
||||
static _Atomic uint32_t dbg_logs = 0;
|
||||
uint32_t n = atomic_fetch_add_explicit(&dbg_logs, 1, memory_order_relaxed);
|
||||
if (n < 4) {
|
||||
TinySlabMeta* meta = &tls->ss->slabs[tls->slab_idx];
|
||||
fprintf(stderr,
|
||||
"[DBG_C7_%s] ss=%p slab=%u cls=%u used=%u cap=%u carved=%u freelist=%p\n",
|
||||
tag,
|
||||
(void*)tls->ss,
|
||||
(unsigned)tls->slab_idx,
|
||||
(unsigned)meta->class_idx,
|
||||
(unsigned)meta->used,
|
||||
(unsigned)meta->capacity,
|
||||
(unsigned)meta->carved,
|
||||
meta->freelist);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Forward declarations
|
||||
extern __thread TinyTLSSlab g_tls_slabs[TINY_NUM_CLASSES];
|
||||
@ -45,9 +85,17 @@ extern SuperSlab* superslab_refill(int class_idx);
|
||||
// Performance: Only triggered when pool is empty, cold path cost
|
||||
//
|
||||
static inline int warm_pool_do_prefill(int class_idx, TinyTLSSlab* tls) {
|
||||
#if HAKMEM_BUILD_RELEASE
|
||||
if (class_idx == 7) {
|
||||
warm_pool_rel_c7_prefill_call();
|
||||
}
|
||||
#endif
|
||||
int budget = (tiny_warm_pool_count(class_idx) == 0) ? WARM_POOL_PREFILL_BUDGET : 1;
|
||||
|
||||
while (budget > 0) {
|
||||
if (class_idx == 7) {
|
||||
warm_prefill_log_c7_meta("PREFILL_META", tls);
|
||||
}
|
||||
if (!tls->ss) {
|
||||
// Need to load a new SuperSlab
|
||||
if (!superslab_refill(class_idx)) {
|
||||
@ -61,16 +109,75 @@ static inline int warm_pool_do_prefill(int class_idx, TinyTLSSlab* tls) {
|
||||
break;
|
||||
}
|
||||
|
||||
// C7 safety: prefer only pristine slabs (used=0 carved=0 freelist=NULL)
|
||||
if (class_idx == 7) {
|
||||
TinySlabMeta* meta = &tls->ss->slabs[tls->slab_idx];
|
||||
if (meta->class_idx == 7 &&
|
||||
(meta->used > 0 || meta->carved > 0 || meta->freelist != NULL)) {
|
||||
#if HAKMEM_BUILD_RELEASE
|
||||
static _Atomic int rel_c7_skip_logged = 0;
|
||||
if (atomic_load_explicit(&rel_c7_skip_logged, memory_order_relaxed) == 0) {
|
||||
fprintf(stderr,
|
||||
"[REL_C7_PREFILL_SKIP_NONEMPTY] ss=%p slab=%u used=%u cap=%u carved=%u freelist=%p\n",
|
||||
(void*)tls->ss,
|
||||
(unsigned)tls->slab_idx,
|
||||
(unsigned)meta->used,
|
||||
(unsigned)meta->capacity,
|
||||
(unsigned)meta->carved,
|
||||
meta->freelist);
|
||||
atomic_store_explicit(&rel_c7_skip_logged, 1, memory_order_relaxed);
|
||||
}
|
||||
#else
|
||||
static __thread int dbg_c7_skip_logged = 0;
|
||||
if (dbg_c7_skip_logged < 4) {
|
||||
fprintf(stderr,
|
||||
"[DBG_C7_PREFILL_SKIP_NONEMPTY] ss=%p slab=%u used=%u cap=%u carved=%u freelist=%p\n",
|
||||
(void*)tls->ss,
|
||||
(unsigned)tls->slab_idx,
|
||||
(unsigned)meta->used,
|
||||
(unsigned)meta->capacity,
|
||||
(unsigned)meta->carved,
|
||||
meta->freelist);
|
||||
dbg_c7_skip_logged++;
|
||||
}
|
||||
#endif
|
||||
tls->ss = NULL; // Drop exhausted slab and try another
|
||||
budget--;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (budget > 1) {
|
||||
// Prefill mode: push to pool and load another
|
||||
tiny_warm_pool_push(class_idx, tls->ss);
|
||||
warm_pool_record_prefilled(class_idx);
|
||||
tls->ss = NULL; // Force next iteration to refill
|
||||
budget--;
|
||||
} else {
|
||||
// Final slab: keep in TLS for immediate carving
|
||||
budget = 0;
|
||||
#if HAKMEM_BUILD_RELEASE
|
||||
if (class_idx == 7) {
|
||||
warm_pool_rel_c7_prefill_slab();
|
||||
}
|
||||
#else
|
||||
if (class_idx == 7) {
|
||||
static __thread int dbg_c7_prefill_logs = 0;
|
||||
if (dbg_c7_prefill_logs < 8) {
|
||||
TinySlabMeta* meta = &tls->ss->slabs[tls->slab_idx];
|
||||
fprintf(stderr,
|
||||
"[DBG_C7_PREFILL] ss=%p slab=%u used=%u cap=%u carved=%u freelist=%p\n",
|
||||
(void*)tls->ss,
|
||||
(unsigned)tls->slab_idx,
|
||||
(unsigned)meta->used,
|
||||
(unsigned)meta->capacity,
|
||||
(unsigned)meta->carved,
|
||||
meta->freelist);
|
||||
dbg_c7_prefill_logs++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
tls->ss = NULL; // Force next iteration to refill
|
||||
budget--;
|
||||
} else {
|
||||
// Final slab: keep in TLS for immediate carving
|
||||
budget = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 0; // Success
|
||||
|
||||
64
core/box/warm_pool_rel_counters_box.h
Normal file
64
core/box/warm_pool_rel_counters_box.h
Normal file
@ -0,0 +1,64 @@
|
||||
// warm_pool_rel_counters_box.h
|
||||
// Box: Lightweight Release-side counters for C7 Warm/TLS instrumentation.
|
||||
#pragma once
|
||||
|
||||
#include <stdatomic.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if HAKMEM_BUILD_RELEASE
|
||||
#ifdef WARM_POOL_REL_DEFINE
|
||||
_Atomic uint64_t g_rel_c7_carve_attempts = 0;
|
||||
_Atomic uint64_t g_rel_c7_carve_success = 0;
|
||||
_Atomic uint64_t g_rel_c7_carve_zero = 0;
|
||||
_Atomic uint64_t g_rel_c7_warm_prefill_calls = 0;
|
||||
_Atomic uint64_t g_rel_c7_warm_prefill_slabs = 0;
|
||||
#else
|
||||
extern _Atomic uint64_t g_rel_c7_carve_attempts;
|
||||
extern _Atomic uint64_t g_rel_c7_carve_success;
|
||||
extern _Atomic uint64_t g_rel_c7_carve_zero;
|
||||
extern _Atomic uint64_t g_rel_c7_warm_prefill_calls;
|
||||
extern _Atomic uint64_t g_rel_c7_warm_prefill_slabs;
|
||||
#endif
|
||||
|
||||
static inline void warm_pool_rel_c7_carve_attempt(void) {
|
||||
atomic_fetch_add_explicit(&g_rel_c7_carve_attempts, 1, memory_order_relaxed);
|
||||
}
|
||||
static inline void warm_pool_rel_c7_carve_success(void) {
|
||||
atomic_fetch_add_explicit(&g_rel_c7_carve_success, 1, memory_order_relaxed);
|
||||
}
|
||||
static inline void warm_pool_rel_c7_carve_zero(void) {
|
||||
atomic_fetch_add_explicit(&g_rel_c7_carve_zero, 1, memory_order_relaxed);
|
||||
}
|
||||
static inline void warm_pool_rel_c7_prefill_call(void) {
|
||||
atomic_fetch_add_explicit(&g_rel_c7_warm_prefill_calls, 1, memory_order_relaxed);
|
||||
}
|
||||
static inline void warm_pool_rel_c7_prefill_slab(void) {
|
||||
atomic_fetch_add_explicit(&g_rel_c7_warm_prefill_slabs, 1, memory_order_relaxed);
|
||||
}
|
||||
static inline uint64_t warm_pool_rel_c7_carve_attempts(void) {
|
||||
return atomic_load_explicit(&g_rel_c7_carve_attempts, memory_order_relaxed);
|
||||
}
|
||||
static inline uint64_t warm_pool_rel_c7_carve_successes(void) {
|
||||
return atomic_load_explicit(&g_rel_c7_carve_success, memory_order_relaxed);
|
||||
}
|
||||
static inline uint64_t warm_pool_rel_c7_carve_zeroes(void) {
|
||||
return atomic_load_explicit(&g_rel_c7_carve_zero, memory_order_relaxed);
|
||||
}
|
||||
static inline uint64_t warm_pool_rel_c7_prefill_calls(void) {
|
||||
return atomic_load_explicit(&g_rel_c7_warm_prefill_calls, memory_order_relaxed);
|
||||
}
|
||||
static inline uint64_t warm_pool_rel_c7_prefill_slabs(void) {
|
||||
return atomic_load_explicit(&g_rel_c7_warm_prefill_slabs, memory_order_relaxed);
|
||||
}
|
||||
#else
|
||||
static inline void warm_pool_rel_c7_carve_attempt(void) { }
|
||||
static inline void warm_pool_rel_c7_carve_success(void) { }
|
||||
static inline void warm_pool_rel_c7_carve_zero(void) { }
|
||||
static inline void warm_pool_rel_c7_prefill_call(void) { }
|
||||
static inline void warm_pool_rel_c7_prefill_slab(void) { }
|
||||
static inline uint64_t warm_pool_rel_c7_carve_attempts(void) { return 0; }
|
||||
static inline uint64_t warm_pool_rel_c7_carve_successes(void) { return 0; }
|
||||
static inline uint64_t warm_pool_rel_c7_carve_zeroes(void) { return 0; }
|
||||
static inline uint64_t warm_pool_rel_c7_prefill_calls(void) { return 0; }
|
||||
static inline uint64_t warm_pool_rel_c7_prefill_slabs(void) { return 0; }
|
||||
#endif
|
||||
57
core/box/warm_tls_bind_logger_box.h
Normal file
57
core/box/warm_tls_bind_logger_box.h
Normal file
@ -0,0 +1,57 @@
|
||||
// warm_tls_bind_logger_box.h
|
||||
// Box: Warm TLS Bind experiment logging with simple throttling.
|
||||
#pragma once
|
||||
|
||||
#include "../hakmem_tiny_superslab.h"
|
||||
#include <stdatomic.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
static _Atomic int g_warm_tls_bind_log_limit = -1;
|
||||
static _Atomic int g_warm_tls_bind_log_count = 0;
|
||||
|
||||
static inline int warm_tls_bind_log_limit(void) {
|
||||
int limit = atomic_load_explicit(&g_warm_tls_bind_log_limit, memory_order_relaxed);
|
||||
if (__builtin_expect(limit == -1, 0)) {
|
||||
const char* e = getenv("HAKMEM_WARM_TLS_BIND_LOG_MAX");
|
||||
int parsed = (e && *e) ? atoi(e) : 1;
|
||||
atomic_store_explicit(&g_warm_tls_bind_log_limit, parsed, memory_order_relaxed);
|
||||
limit = parsed;
|
||||
}
|
||||
return limit;
|
||||
}
|
||||
|
||||
static inline int warm_tls_bind_log_acquire(void) {
|
||||
int limit = warm_tls_bind_log_limit();
|
||||
int prev = atomic_fetch_add_explicit(&g_warm_tls_bind_log_count, 1, memory_order_relaxed);
|
||||
return prev < limit;
|
||||
}
|
||||
|
||||
static inline void warm_tls_bind_log_success(SuperSlab* ss, int slab_idx) {
|
||||
if (warm_tls_bind_log_acquire()) {
|
||||
fprintf(stderr, "[WARM_TLS_BIND] C7 bind success: ss=%p slab=%d\n",
|
||||
(void*)ss, slab_idx);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void warm_tls_bind_log_tls_carve(SuperSlab* ss, int slab_idx, void* block) {
|
||||
if (warm_tls_bind_log_acquire()) {
|
||||
fprintf(stderr,
|
||||
"[WARM_TLS_BIND] C7 TLS carve success: ss=%p slab=%d block=%p\n",
|
||||
(void*)ss, slab_idx, block);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void warm_tls_bind_log_tls_fail(SuperSlab* ss, int slab_idx) {
|
||||
if (warm_tls_bind_log_acquire()) {
|
||||
fprintf(stderr,
|
||||
"[WARM_TLS_BIND] C7 TLS carve failed, fallback (ss=%p slab=%d)\n",
|
||||
(void*)ss, slab_idx);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline void warm_tls_bind_log_success(SuperSlab* ss, int slab_idx) { (void)ss; (void)slab_idx; }
|
||||
static inline void warm_tls_bind_log_tls_carve(SuperSlab* ss, int slab_idx, void* block) { (void)ss; (void)slab_idx; (void)block; }
|
||||
static inline void warm_tls_bind_log_tls_fail(SuperSlab* ss, int slab_idx) { (void)ss; (void)slab_idx; }
|
||||
#endif
|
||||
Reference in New Issue
Block a user