Larson double-free investigation: Add full operation lifecycle logging

**Diagnostic Enhancement**: Complete malloc/free/pop operation tracing for debug

**Problem**: Larson crashes with TLS_SLL_DUP at count=18, need to trace exact
pointer lifecycle to identify if allocator returns duplicate addresses or if
benchmark has double-free bug.

**Implementation** (ChatGPT + Claude + Task collaboration):

1. **Global Operation Counter** (core/hakmem_tiny_config_box.inc:9):
   - Single atomic counter for all operations (malloc/free/pop)
   - Chronological ordering across all paths

2. **Allocation Logging** (core/hakmem_tiny_config_box.inc:148-161):
   - HAK_RET_ALLOC macro enhanced with operation logging
   - Logs first 50 class=1 allocations with ptr/base/tls_count

3. **Free Logging** (core/tiny_free_fast_v2.inc.h:222-235):
   - Added before tls_sll_push() call (line 221)
   - Logs first 50 class=1 frees with ptr/base/tls_count_before

4. **Pop Logging** (core/box/tls_sll_box.h:587-597):
   - Added in tls_sll_pop_impl() after successful pop
   - Logs first 50 class=1 pops with base/tls_count_after

5. **Drain Debug Logging** (core/box/tls_sll_drain_box.h:143-151):
   - Enhanced drain loop with detailed logging
   - Tracks pop failures and drained block counts

**Initial Findings**:
- First 19 operations: ALL frees, ZERO allocations, ZERO pops
- OP#0006: First free of 0x...430
- OP#0018: Duplicate free of 0x...430 → TLS_SLL_DUP detected
- Suggests either: (a) allocations before logging starts, or (b) Larson bug

**Debug-only**: All logging gated by !HAKMEM_BUILD_RELEASE (zero cost in release)

**Next Steps**:
- Expand logging window to 200 operations
- Log initialization phase allocations
- Cross-check with Larson benchmark source

**Status**: Ready for extended testing
This commit is contained in:
Moe Charm (CI)
2025-11-27 08:18:01 +09:00
parent 8553894171
commit 8af9123bcc
4 changed files with 56 additions and 2 deletions

View File

@ -583,6 +583,18 @@ static inline bool tls_sll_pop_impl(int class_idx, void** out, const char* where
#if !HAKMEM_BUILD_RELEASE #if !HAKMEM_BUILD_RELEASE
// Record callsite for debugging (debug-only) // Record callsite for debugging (debug-only)
s_tls_sll_last_pop_from[class_idx] = where; s_tls_sll_last_pop_from[class_idx] = where;
// Debug: Log pop operations (first 50, class 1 only)
{
extern _Atomic uint64_t g_debug_op_count;
uint64_t op = atomic_load(&g_debug_op_count);
if (op < 50 && class_idx == 1) {
fprintf(stderr, "[OP#%04lu POP] cls=%d base=%p tls_count_after=%u\n",
(unsigned long)op, class_idx, base,
g_tls_sll[class_idx].count);
fflush(stderr);
}
}
#else #else
(void)where; // Suppress unused warning in release (void)where; // Suppress unused warning in release
#endif #endif

View File

@ -140,9 +140,16 @@ static inline uint32_t tiny_tls_sll_drain(int class_idx, uint32_t batch_size) {
void* base = NULL; void* base = NULL;
if (!tls_sll_pop(class_idx, &base)) { if (!tls_sll_pop(class_idx, &base)) {
// TLS SLL exhausted (concurrent drain or count mismatch) // TLS SLL exhausted (concurrent drain or count mismatch)
if (g_debug) {
fprintf(stderr, "[TLS_SLL_DRAIN] Pop failed at i=%u/%u (TLS SLL exhausted)\n", i, to_drain);
}
break; break;
} }
if (g_debug && i < 5) {
fprintf(stderr, "[TLS_SLL_DRAIN] Popped %u/%u: class=%d base=%p\n", i+1, to_drain, class_idx, base);
}
// Resolve SuperSlab/Slab (like slow path does) // Resolve SuperSlab/Slab (like slow path does)
SuperSlab* ss = hak_super_lookup(base); SuperSlab* ss = hak_super_lookup(base);
if (!ss || ss->magic != SUPERSLAB_MAGIC) { if (!ss || ss->magic != SUPERSLAB_MAGIC) {

View File

@ -2,6 +2,12 @@
// Box: Tiny allocator configuration, debug counters, and return helpers. // Box: Tiny allocator configuration, debug counters, and return helpers.
// Extracted from hakmem_tiny.c to reduce file size and isolate config logic. // Extracted from hakmem_tiny.c to reduce file size and isolate config logic.
// ============================================================================
// Global Operation Counter (for debug logging)
// ============================================================================
#include <stdatomic.h>
_Atomic uint64_t g_debug_op_count = 0;
// ============================================================================ // ============================================================================
// Size class table (Box 3 dependency) // Size class table (Box 3 dependency)
// ============================================================================ // ============================================================================
@ -138,8 +144,21 @@ static inline void tiny_debug_track_alloc_ret(int cls, void* ptr);
return (void*)((uint8_t*)(base_ptr) + 1); \ return (void*)((uint8_t*)(base_ptr) + 1); \
} while(0) } while(0)
#else #else
// Debug: Keep full validation via tiny_region_id_write_header() // Debug: Keep full validation via tiny_region_id_write_header() + operation logging
#define HAK_RET_ALLOC(cls, ptr) return tiny_region_id_write_header((ptr), (cls)) #define HAK_RET_ALLOC(cls, ptr) do { \
extern _Atomic uint64_t g_debug_op_count; \
extern __thread TinyTLSSLL g_tls_sll[]; \
void* base_ptr = (ptr); \
void* user_ptr = tiny_region_id_write_header(base_ptr, (cls)); \
uint64_t op = atomic_fetch_add(&g_debug_op_count, 1); \
if (op < 50 && (cls) == 1) { \
fprintf(stderr, "[OP#%04lu ALLOC] cls=%d ptr=%p base=%p from=alloc tls_count=%u\n", \
(unsigned long)op, (cls), user_ptr, base_ptr, \
g_tls_sll[(cls)].count); \
fflush(stderr); \
} \
return user_ptr; \
} while(0)
#endif #endif
#else #else
// Legacy: Stats and routing before return // Legacy: Stats and routing before return

View File

@ -218,6 +218,22 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
// REVERT E3-2: Use Box TLS-SLL for all builds (testing hypothesis) // REVERT E3-2: Use Box TLS-SLL for all builds (testing hypothesis)
// Hypothesis: Box TLS-SLL acts as verification layer, masking underlying bugs // Hypothesis: Box TLS-SLL acts as verification layer, masking underlying bugs
#if !HAKMEM_BUILD_RELEASE
// Debug: Log free operations (first 50, class 1 only)
{
extern _Atomic uint64_t g_debug_op_count;
extern __thread TinyTLSSLL g_tls_sll[];
uint64_t op = atomic_fetch_add(&g_debug_op_count, 1);
if (op < 50 && class_idx == 1) {
fprintf(stderr, "[OP#%04lu FREE] cls=%d ptr=%p base=%p tls_count_before=%u\n",
(unsigned long)op, class_idx, ptr, base,
g_tls_sll[class_idx].count);
fflush(stderr);
}
}
#endif
if (!tls_sll_push(class_idx, base, UINT32_MAX)) { if (!tls_sll_push(class_idx, base, UINT32_MAX)) {
// C7 rejected or capacity exceeded - route to slow path // C7 rejected or capacity exceeded - route to slow path
return 0; return 0;