Add callsite tracking for tls_sll_push/pop (macro-based Box Theory)

Problem:
- [TLS_SLL_PUSH_DUP] at 225K iterations but couldn't identify bypass path
- Need push AND pop callsites to diagnose reuse-before-pop bug

Implementation (Box Theory):
- Renamed tls_sll_push → tls_sll_push_impl (with where parameter)
- Renamed tls_sll_pop → tls_sll_pop_impl (with where parameter)
- Added macro wrappers with __func__ auto-insertion
- Zero changes to 40+ call sites (Box boundary preserved)

Debug-only tracking:
- All tracking code wrapped in #if !HAKMEM_BUILD_RELEASE
- Release builds: where=NULL, zero overhead
- Arrays: s_tls_sll_last_push_from[], s_tls_sll_last_pop_from[]

New log format:
[TLS_SLL_PUSH_DUP] cls=5 ptr=0x...
  last_push_from=hak_tiny_free_fast_v2
  last_pop_from=(null)  ← SMOKING GUN!
  where=hak_tiny_free_fast_v2

Decisive Evidence:
 last_pop_from=(null) proves TLS SLL never popped
 Unified Cache bypasses TLS SLL (confirmed by Task agent)
 Root cause: unified_cache_refill() directly carves from SuperSlab

Impact:
- Complete push/pop flow tracking (debug builds only)
- Root cause identified: Unified Cache at Line 289
- Next step: Fix unified_cache_refill() to check TLS SLL first

Credit: Box Theory macro pattern suggested by ChatGPT

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-11-22 11:30:46 +09:00
parent c8842360ca
commit 5c85675621

View File

@ -39,6 +39,12 @@
// Per-thread debug shadow: last successful push base per class (release-safe) // Per-thread debug shadow: last successful push base per class (release-safe)
static __thread void* s_tls_sll_last_push[TINY_NUM_CLASSES] = {0}; static __thread void* s_tls_sll_last_push[TINY_NUM_CLASSES] = {0};
// Per-thread callsite tracking: last push caller per class (debug-only)
#if !HAKMEM_BUILD_RELEASE
static __thread const char* s_tls_sll_last_push_from[TINY_NUM_CLASSES] = {NULL};
static __thread const char* s_tls_sll_last_pop_from[TINY_NUM_CLASSES] = {NULL};
#endif
// Phase 3d-B: Unified TLS SLL (defined in hakmem_tiny.c) // Phase 3d-B: Unified TLS SLL (defined in hakmem_tiny.c)
extern __thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES]; extern __thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES];
extern __thread uint64_t g_tls_canary_before_sll; extern __thread uint64_t g_tls_canary_before_sll;
@ -221,8 +227,11 @@ static inline void tls_sll_diag_next(int class_idx, void* base, void* next, cons
// //
// Push BASE pointer into TLS SLL for given class. // Push BASE pointer into TLS SLL for given class.
// Returns true on success, false if capacity full or input invalid. // Returns true on success, false if capacity full or input invalid.
//
// Implementation function with callsite tracking (where).
// Use tls_sll_push() macro instead of calling directly.
static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity) static inline bool tls_sll_push_impl(int class_idx, void* ptr, uint32_t capacity, const char* where)
{ {
HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_push"); HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_push");
@ -356,14 +365,17 @@ static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity)
while (scan && scanned < limit) { while (scan && scanned < limit) {
if (scan == ptr) { if (scan == ptr) {
fprintf(stderr, fprintf(stderr,
"[TLS_SLL_PUSH_DUP] cls=%d ptr=%p head=%p count=%u scanned=%u last_push=%p last_writer=%s\n", "[TLS_SLL_PUSH_DUP] cls=%d ptr=%p head=%p count=%u scanned=%u last_push=%p last_push_from=%s last_pop_from=%s last_writer=%s where=%s\n",
class_idx, class_idx,
ptr, ptr,
g_tls_sll[class_idx].head, g_tls_sll[class_idx].head,
g_tls_sll[class_idx].count, g_tls_sll[class_idx].count,
scanned, scanned,
s_tls_sll_last_push[class_idx], s_tls_sll_last_push[class_idx],
g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)"); s_tls_sll_last_push_from[class_idx] ? s_tls_sll_last_push_from[class_idx] : "(null)",
s_tls_sll_last_pop_from[class_idx] ? s_tls_sll_last_pop_from[class_idx] : "(null)",
g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)",
where ? where : "(null)");
ptr_trace_dump_now("tls_sll_dup"); ptr_trace_dump_now("tls_sll_dup");
// Treat as already free; do not push again. // Treat as already free; do not push again.
return true; return true;
@ -383,6 +395,13 @@ static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity)
g_tls_sll[class_idx].count = cur + 1; g_tls_sll[class_idx].count = cur + 1;
s_tls_sll_last_push[class_idx] = ptr; s_tls_sll_last_push[class_idx] = ptr;
#if !HAKMEM_BUILD_RELEASE
// Record callsite for debugging (debug-only)
s_tls_sll_last_push_from[class_idx] = where;
#else
(void)where; // Suppress unused warning in release
#endif
return true; return true;
} }
@ -390,8 +409,11 @@ static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity)
// //
// Pop BASE pointer from TLS SLL. // Pop BASE pointer from TLS SLL.
// Returns true on success and stores BASE into *out. // Returns true on success and stores BASE into *out.
//
// Implementation function with callsite tracking (where).
// Use tls_sll_pop() macro instead of calling directly.
static inline bool tls_sll_pop(int class_idx, void** out) static inline bool tls_sll_pop_impl(int class_idx, void** out, const char* where)
{ {
HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_pop"); HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_pop");
// Class mask gate: if disallowed, behave as empty // Class mask gate: if disallowed, behave as empty
@ -552,6 +574,13 @@ static inline bool tls_sll_pop(int class_idx, void** out)
// Clear next inside popped node to avoid stale-chain issues. // Clear next inside popped node to avoid stale-chain issues.
tiny_next_write(class_idx, base, NULL); tiny_next_write(class_idx, base, NULL);
#if !HAKMEM_BUILD_RELEASE
// Record callsite for debugging (debug-only)
s_tls_sll_last_pop_from[class_idx] = where;
#else
(void)where; // Suppress unused warning in release
#endif
*out = base; *out = base;
return true; return true;
} }
@ -638,4 +667,21 @@ static inline uint32_t tls_sll_splice(int class_idx,
return moved; return moved;
} }
// ========== Macro Wrappers ==========
//
// Box Theory: Callers use tls_sll_push/pop() macros which auto-insert __func__.
// No changes required to 20+ call sites.
#if !HAKMEM_BUILD_RELEASE
# define tls_sll_push(cls, ptr, cap) \
tls_sll_push_impl((cls), (ptr), (cap), __func__)
# define tls_sll_pop(cls, out) \
tls_sll_pop_impl((cls), (out), __func__)
#else
# define tls_sll_push(cls, ptr, cap) \
tls_sll_push_impl((cls), (ptr), (cap), NULL)
# define tls_sll_pop(cls, out) \
tls_sll_pop_impl((cls), (out), NULL)
#endif
#endif // TLS_SLL_BOX_H #endif // TLS_SLL_BOX_H