Add callsite tracking for tls_sll_push/pop (macro-based Box Theory)
Problem: - [TLS_SLL_PUSH_DUP] at 225K iterations but couldn't identify bypass path - Need push AND pop callsites to diagnose reuse-before-pop bug Implementation (Box Theory): - Renamed tls_sll_push → tls_sll_push_impl (with where parameter) - Renamed tls_sll_pop → tls_sll_pop_impl (with where parameter) - Added macro wrappers with __func__ auto-insertion - Zero changes to 40+ call sites (Box boundary preserved) Debug-only tracking: - All tracking code wrapped in #if !HAKMEM_BUILD_RELEASE - Release builds: where=NULL, zero overhead - Arrays: s_tls_sll_last_push_from[], s_tls_sll_last_pop_from[] New log format: [TLS_SLL_PUSH_DUP] cls=5 ptr=0x... last_push_from=hak_tiny_free_fast_v2 last_pop_from=(null) ← SMOKING GUN! where=hak_tiny_free_fast_v2 Decisive Evidence: ✅ last_pop_from=(null) proves TLS SLL never popped ✅ Unified Cache bypasses TLS SLL (confirmed by Task agent) ✅ Root cause: unified_cache_refill() directly carves from SuperSlab Impact: - Complete push/pop flow tracking (debug builds only) - Root cause identified: Unified Cache at Line 289 - Next step: Fix unified_cache_refill() to check TLS SLL first Credit: Box Theory macro pattern suggested by ChatGPT 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -39,6 +39,12 @@
|
|||||||
// Per-thread debug shadow: last successful push base per class (release-safe)
|
// Per-thread debug shadow: last successful push base per class (release-safe)
|
||||||
static __thread void* s_tls_sll_last_push[TINY_NUM_CLASSES] = {0};
|
static __thread void* s_tls_sll_last_push[TINY_NUM_CLASSES] = {0};
|
||||||
|
|
||||||
|
// Per-thread callsite tracking: last push caller per class (debug-only)
|
||||||
|
#if !HAKMEM_BUILD_RELEASE
|
||||||
|
static __thread const char* s_tls_sll_last_push_from[TINY_NUM_CLASSES] = {NULL};
|
||||||
|
static __thread const char* s_tls_sll_last_pop_from[TINY_NUM_CLASSES] = {NULL};
|
||||||
|
#endif
|
||||||
|
|
||||||
// Phase 3d-B: Unified TLS SLL (defined in hakmem_tiny.c)
|
// Phase 3d-B: Unified TLS SLL (defined in hakmem_tiny.c)
|
||||||
extern __thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES];
|
extern __thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES];
|
||||||
extern __thread uint64_t g_tls_canary_before_sll;
|
extern __thread uint64_t g_tls_canary_before_sll;
|
||||||
@ -221,8 +227,11 @@ static inline void tls_sll_diag_next(int class_idx, void* base, void* next, cons
|
|||||||
//
|
//
|
||||||
// Push BASE pointer into TLS SLL for given class.
|
// Push BASE pointer into TLS SLL for given class.
|
||||||
// Returns true on success, false if capacity full or input invalid.
|
// Returns true on success, false if capacity full or input invalid.
|
||||||
|
//
|
||||||
|
// Implementation function with callsite tracking (where).
|
||||||
|
// Use tls_sll_push() macro instead of calling directly.
|
||||||
|
|
||||||
static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity)
|
static inline bool tls_sll_push_impl(int class_idx, void* ptr, uint32_t capacity, const char* where)
|
||||||
{
|
{
|
||||||
HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_push");
|
HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_push");
|
||||||
|
|
||||||
@ -356,14 +365,17 @@ static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity)
|
|||||||
while (scan && scanned < limit) {
|
while (scan && scanned < limit) {
|
||||||
if (scan == ptr) {
|
if (scan == ptr) {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"[TLS_SLL_PUSH_DUP] cls=%d ptr=%p head=%p count=%u scanned=%u last_push=%p last_writer=%s\n",
|
"[TLS_SLL_PUSH_DUP] cls=%d ptr=%p head=%p count=%u scanned=%u last_push=%p last_push_from=%s last_pop_from=%s last_writer=%s where=%s\n",
|
||||||
class_idx,
|
class_idx,
|
||||||
ptr,
|
ptr,
|
||||||
g_tls_sll[class_idx].head,
|
g_tls_sll[class_idx].head,
|
||||||
g_tls_sll[class_idx].count,
|
g_tls_sll[class_idx].count,
|
||||||
scanned,
|
scanned,
|
||||||
s_tls_sll_last_push[class_idx],
|
s_tls_sll_last_push[class_idx],
|
||||||
g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)");
|
s_tls_sll_last_push_from[class_idx] ? s_tls_sll_last_push_from[class_idx] : "(null)",
|
||||||
|
s_tls_sll_last_pop_from[class_idx] ? s_tls_sll_last_pop_from[class_idx] : "(null)",
|
||||||
|
g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)",
|
||||||
|
where ? where : "(null)");
|
||||||
ptr_trace_dump_now("tls_sll_dup");
|
ptr_trace_dump_now("tls_sll_dup");
|
||||||
// Treat as already free; do not push again.
|
// Treat as already free; do not push again.
|
||||||
return true;
|
return true;
|
||||||
@ -383,6 +395,13 @@ static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity)
|
|||||||
g_tls_sll[class_idx].count = cur + 1;
|
g_tls_sll[class_idx].count = cur + 1;
|
||||||
s_tls_sll_last_push[class_idx] = ptr;
|
s_tls_sll_last_push[class_idx] = ptr;
|
||||||
|
|
||||||
|
#if !HAKMEM_BUILD_RELEASE
|
||||||
|
// Record callsite for debugging (debug-only)
|
||||||
|
s_tls_sll_last_push_from[class_idx] = where;
|
||||||
|
#else
|
||||||
|
(void)where; // Suppress unused warning in release
|
||||||
|
#endif
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -390,8 +409,11 @@ static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity)
|
|||||||
//
|
//
|
||||||
// Pop BASE pointer from TLS SLL.
|
// Pop BASE pointer from TLS SLL.
|
||||||
// Returns true on success and stores BASE into *out.
|
// Returns true on success and stores BASE into *out.
|
||||||
|
//
|
||||||
|
// Implementation function with callsite tracking (where).
|
||||||
|
// Use tls_sll_pop() macro instead of calling directly.
|
||||||
|
|
||||||
static inline bool tls_sll_pop(int class_idx, void** out)
|
static inline bool tls_sll_pop_impl(int class_idx, void** out, const char* where)
|
||||||
{
|
{
|
||||||
HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_pop");
|
HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_pop");
|
||||||
// Class mask gate: if disallowed, behave as empty
|
// Class mask gate: if disallowed, behave as empty
|
||||||
@ -552,6 +574,13 @@ static inline bool tls_sll_pop(int class_idx, void** out)
|
|||||||
// Clear next inside popped node to avoid stale-chain issues.
|
// Clear next inside popped node to avoid stale-chain issues.
|
||||||
tiny_next_write(class_idx, base, NULL);
|
tiny_next_write(class_idx, base, NULL);
|
||||||
|
|
||||||
|
#if !HAKMEM_BUILD_RELEASE
|
||||||
|
// Record callsite for debugging (debug-only)
|
||||||
|
s_tls_sll_last_pop_from[class_idx] = where;
|
||||||
|
#else
|
||||||
|
(void)where; // Suppress unused warning in release
|
||||||
|
#endif
|
||||||
|
|
||||||
*out = base;
|
*out = base;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -638,4 +667,21 @@ static inline uint32_t tls_sll_splice(int class_idx,
|
|||||||
return moved;
|
return moved;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ========== Macro Wrappers ==========
|
||||||
|
//
|
||||||
|
// Box Theory: Callers use tls_sll_push/pop() macros which auto-insert __func__.
|
||||||
|
// No changes required to 20+ call sites.
|
||||||
|
|
||||||
|
#if !HAKMEM_BUILD_RELEASE
|
||||||
|
# define tls_sll_push(cls, ptr, cap) \
|
||||||
|
tls_sll_push_impl((cls), (ptr), (cap), __func__)
|
||||||
|
# define tls_sll_pop(cls, out) \
|
||||||
|
tls_sll_pop_impl((cls), (out), __func__)
|
||||||
|
#else
|
||||||
|
# define tls_sll_push(cls, ptr, cap) \
|
||||||
|
tls_sll_push_impl((cls), (ptr), (cap), NULL)
|
||||||
|
# define tls_sll_pop(cls, out) \
|
||||||
|
tls_sll_pop_impl((cls), (out), NULL)
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif // TLS_SLL_BOX_H
|
#endif // TLS_SLL_BOX_H
|
||||||
|
|||||||
Reference in New Issue
Block a user