From 8af9123bccd2aa7e80f732ea24582d114c3a4b86 Mon Sep 17 00:00:00 2001 From: "Moe Charm (CI)" Date: Thu, 27 Nov 2025 08:18:01 +0900 Subject: [PATCH] Larson double-free investigation: Add full operation lifecycle logging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **Diagnostic Enhancement**: Complete malloc/free/pop operation tracing for debug **Problem**: Larson crashes with TLS_SLL_DUP at count=18, need to trace exact pointer lifecycle to identify if allocator returns duplicate addresses or if benchmark has double-free bug. **Implementation** (ChatGPT + Claude + Task collaboration): 1. **Global Operation Counter** (core/hakmem_tiny_config_box.inc:9): - Single atomic counter for all operations (malloc/free/pop) - Chronological ordering across all paths 2. **Allocation Logging** (core/hakmem_tiny_config_box.inc:148-161): - HAK_RET_ALLOC macro enhanced with operation logging - Logs first 50 class=1 allocations with ptr/base/tls_count 3. **Free Logging** (core/tiny_free_fast_v2.inc.h:222-235): - Added before tls_sll_push() call (line 221) - Logs first 50 class=1 frees with ptr/base/tls_count_before 4. **Pop Logging** (core/box/tls_sll_box.h:587-597): - Added in tls_sll_pop_impl() after successful pop - Logs first 50 class=1 pops with base/tls_count_after 5. **Drain Debug Logging** (core/box/tls_sll_drain_box.h:143-151): - Enhanced drain loop with detailed logging - Tracks pop failures and drained block counts **Initial Findings**: - First 19 operations: ALL frees, ZERO allocations, ZERO pops - OP#0006: First free of 0x...430 - OP#0018: Duplicate free of 0x...430 → TLS_SLL_DUP detected - Suggests either: (a) allocations before logging starts, or (b) Larson bug **Debug-only**: All logging gated by !HAKMEM_BUILD_RELEASE (zero cost in release) **Next Steps**: - Expand logging window to 200 operations - Log initialization phase allocations - Cross-check with Larson benchmark source **Status**: Ready for extended testing --- core/box/tls_sll_box.h | 12 ++++++++++++ core/box/tls_sll_drain_box.h | 7 +++++++ core/hakmem_tiny_config_box.inc | 23 +++++++++++++++++++++-- core/tiny_free_fast_v2.inc.h | 16 ++++++++++++++++ 4 files changed, 56 insertions(+), 2 deletions(-) diff --git a/core/box/tls_sll_box.h b/core/box/tls_sll_box.h index cadf2c5e..bd0aa09c 100644 --- a/core/box/tls_sll_box.h +++ b/core/box/tls_sll_box.h @@ -583,6 +583,18 @@ static inline bool tls_sll_pop_impl(int class_idx, void** out, const char* where #if !HAKMEM_BUILD_RELEASE // Record callsite for debugging (debug-only) s_tls_sll_last_pop_from[class_idx] = where; + + // Debug: Log pop operations (first 50, class 1 only) + { + extern _Atomic uint64_t g_debug_op_count; + uint64_t op = atomic_load(&g_debug_op_count); + if (op < 50 && class_idx == 1) { + fprintf(stderr, "[OP#%04lu POP] cls=%d base=%p tls_count_after=%u\n", + (unsigned long)op, class_idx, base, + g_tls_sll[class_idx].count); + fflush(stderr); + } + } #else (void)where; // Suppress unused warning in release #endif diff --git a/core/box/tls_sll_drain_box.h b/core/box/tls_sll_drain_box.h index 8a1b841e..6b372db6 100644 --- a/core/box/tls_sll_drain_box.h +++ b/core/box/tls_sll_drain_box.h @@ -140,9 +140,16 @@ static inline uint32_t tiny_tls_sll_drain(int class_idx, uint32_t batch_size) { void* base = NULL; if (!tls_sll_pop(class_idx, &base)) { // TLS SLL exhausted (concurrent drain or count mismatch) + if (g_debug) { + fprintf(stderr, "[TLS_SLL_DRAIN] Pop failed at i=%u/%u (TLS SLL exhausted)\n", i, to_drain); + } break; } + if (g_debug && i < 5) { + fprintf(stderr, "[TLS_SLL_DRAIN] Popped %u/%u: class=%d base=%p\n", i+1, to_drain, class_idx, base); + } + // Resolve SuperSlab/Slab (like slow path does) SuperSlab* ss = hak_super_lookup(base); if (!ss || ss->magic != SUPERSLAB_MAGIC) { diff --git a/core/hakmem_tiny_config_box.inc b/core/hakmem_tiny_config_box.inc index 7cfed33c..9cb8122e 100644 --- a/core/hakmem_tiny_config_box.inc +++ b/core/hakmem_tiny_config_box.inc @@ -2,6 +2,12 @@ // Box: Tiny allocator configuration, debug counters, and return helpers. // Extracted from hakmem_tiny.c to reduce file size and isolate config logic. +// ============================================================================ +// Global Operation Counter (for debug logging) +// ============================================================================ +#include +_Atomic uint64_t g_debug_op_count = 0; + // ============================================================================ // Size class table (Box 3 dependency) // ============================================================================ @@ -138,8 +144,21 @@ static inline void tiny_debug_track_alloc_ret(int cls, void* ptr); return (void*)((uint8_t*)(base_ptr) + 1); \ } while(0) #else - // Debug: Keep full validation via tiny_region_id_write_header() - #define HAK_RET_ALLOC(cls, ptr) return tiny_region_id_write_header((ptr), (cls)) + // Debug: Keep full validation via tiny_region_id_write_header() + operation logging + #define HAK_RET_ALLOC(cls, ptr) do { \ + extern _Atomic uint64_t g_debug_op_count; \ + extern __thread TinyTLSSLL g_tls_sll[]; \ + void* base_ptr = (ptr); \ + void* user_ptr = tiny_region_id_write_header(base_ptr, (cls)); \ + uint64_t op = atomic_fetch_add(&g_debug_op_count, 1); \ + if (op < 50 && (cls) == 1) { \ + fprintf(stderr, "[OP#%04lu ALLOC] cls=%d ptr=%p base=%p from=alloc tls_count=%u\n", \ + (unsigned long)op, (cls), user_ptr, base_ptr, \ + g_tls_sll[(cls)].count); \ + fflush(stderr); \ + } \ + return user_ptr; \ + } while(0) #endif #else // Legacy: Stats and routing before return diff --git a/core/tiny_free_fast_v2.inc.h b/core/tiny_free_fast_v2.inc.h index d1d279da..7513fbcd 100644 --- a/core/tiny_free_fast_v2.inc.h +++ b/core/tiny_free_fast_v2.inc.h @@ -218,6 +218,22 @@ static inline int hak_tiny_free_fast_v2(void* ptr) { // REVERT E3-2: Use Box TLS-SLL for all builds (testing hypothesis) // Hypothesis: Box TLS-SLL acts as verification layer, masking underlying bugs + +#if !HAKMEM_BUILD_RELEASE + // Debug: Log free operations (first 50, class 1 only) + { + extern _Atomic uint64_t g_debug_op_count; + extern __thread TinyTLSSLL g_tls_sll[]; + uint64_t op = atomic_fetch_add(&g_debug_op_count, 1); + if (op < 50 && class_idx == 1) { + fprintf(stderr, "[OP#%04lu FREE] cls=%d ptr=%p base=%p tls_count_before=%u\n", + (unsigned long)op, class_idx, ptr, base, + g_tls_sll[class_idx].count); + fflush(stderr); + } + } +#endif + if (!tls_sll_push(class_idx, base, UINT32_MAX)) { // C7 rejected or capacity exceeded - route to slow path return 0;