Fix #16: Resolve double BASE→USER conversion causing header corruption

🎯 ROOT CAUSE: Internal allocation helpers were prematurely converting
BASE → USER pointers before returning to caller. The caller then applied
HAK_RET_ALLOC/tiny_region_id_write_header which performed ANOTHER BASE→USER
conversion, resulting in double offset (BASE+2) and header written at
wrong location.

📦 BOX THEORY SOLUTION: Establish clean pointer conversion boundary at
tiny_region_id_write_header, making it the single source of truth for
BASE → USER conversion.

🔧 CHANGES:
- Fix #16: Remove premature BASE→USER conversions (6 locations)
  * core/tiny_alloc_fast.inc.h (3 fixes)
  * core/hakmem_tiny_refill.inc.h (2 fixes)
  * core/hakmem_tiny_fastcache.inc.h (1 fix)

- Fix #12: Add header validation in tls_sll_pop (detect corruption)
- Fix #14: Defense-in-depth header restoration in tls_sll_splice
- Fix #15: USER pointer detection (for debugging)
- Fix #13: Bump window header restoration
- Fix #2, #6, #7, #8: Various header restoration & NULL termination

🧪 TEST RESULTS: 100% SUCCESS
- 10K-500K iterations: All passed
- 8 seeds × 100K: All passed (42,123,456,789,999,314,271,161)
- Performance: ~630K ops/s average (stable)
- Header corruption: ZERO

📋 FIXES SUMMARY:
Fix #1-8:   Initial header restoration & chain fixes (chatgpt-san)
Fix #9-10:  USER pointer auto-fix (later disabled)
Fix #12:    Validation system (caught corruption at call 14209)
Fix #13:    Bump window header writes
Fix #14:    Splice defense-in-depth
Fix #15:    USER pointer detection (debugging tool)
Fix #16:    Double conversion fix (FINAL SOLUTION) 

🎓 LESSONS LEARNED:
1. Validation catches bugs early (Fix #12 was critical)
2. Class-specific inline logging reveals patterns (Option C)
3. Box Theory provides clean architectural boundaries
4. Multiple investigation approaches (Task/chatgpt-san collaboration)

📄 DOCUMENTATION:
- P0_BUG_STATUS.md: Complete bug tracking timeline
- C2_CORRUPTION_ROOT_CAUSE_FINAL.md: Detailed root cause analysis
- FINAL_ANALYSIS_C2_CORRUPTION.md: Investigation methodology

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: Task Agent <task@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
This commit is contained in:
Moe Charm (CI)
2025-11-12 10:33:57 +09:00
parent af589c7169
commit 84dbd97fe9
13 changed files with 1270 additions and 72 deletions

View File

@ -208,14 +208,14 @@ static inline void* tiny_fast_refill_and_take(int class_idx, TinyTLSList* tls) {
else {
// Push failed, return remaining to TLS (preserve order)
tls_list_bulk_put(tls, node, batch_tail, remaining, class_idx);
// CRITICAL FIX: Convert base -> user pointer before returning
void* user_ptr = (class_idx == 7) ? ret : (void*)((uint8_t*)ret + 1);
return user_ptr;
// ✅ FIX #16: Return BASE pointer (not USER)
// Caller will apply HAK_RET_ALLOC which does BASE → USER conversion
return ret;
}
}
// CRITICAL FIX: Convert base -> user pointer before returning
void* user_ptr = (class_idx == 7) ? ret : (void*)((uint8_t*)ret + 1);
return user_ptr;
// ✅ FIX #16: Return BASE pointer (not USER)
// Caller will apply HAK_RET_ALLOC which does BASE → USER conversion
return ret;
}
// Quick slot refill from SLL
@ -352,6 +352,17 @@ static inline int sll_refill_small_from_ss(int class_idx, int max_take) {
void* p = tiny_block_at_index(base, meta->carved, bs);
meta->carved++;
meta->used++;
// ✅ FIX #11B: Restore header BEFORE tls_sll_push
// ROOT CAUSE: Simple refill path carves blocks but doesn't write headers.
// tls_sll_push() expects headers at base for C0-C6 to write next at base+1.
// Without header, base+1 contains garbage → chain corruption → SEGV!
#if HAKMEM_TINY_HEADER_CLASSIDX
if (class_idx != 7) {
*(uint8_t*)p = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);
}
#endif
// CRITICAL: Use Box TLS-SLL API (C7-safe, no race)
if (!tls_sll_push(class_idx, p, sll_cap)) {
// SLL full (should not happen, room was checked)
@ -367,6 +378,16 @@ static inline int sll_refill_small_from_ss(int class_idx, int max_take) {
void* p = meta->freelist;
meta->freelist = *(void**)p;
meta->used++;
// ✅ FIX #11B: Restore header BEFORE tls_sll_push (same as Fix #11 for freelist)
// Freelist stores next at base (offset 0), overwriting header.
// Must restore header so tls_sll_push can write next at base+1 correctly.
#if HAKMEM_TINY_HEADER_CLASSIDX
if (class_idx != 7) {
*(uint8_t*)p = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);
}
#endif
// CRITICAL: Use Box TLS-SLL API (C7-safe, no race)
if (!tls_sll_push(class_idx, p, sll_cap)) {
// SLL full (should not happen, room was checked)
@ -443,14 +464,29 @@ static inline void* superslab_tls_bump_fast(int class_idx) {
uint8_t* cur = g_tls_bcur[class_idx];
if (__builtin_expect(cur != NULL, 0)) {
uint8_t* end = g_tls_bend[class_idx];
// ✅ FIX #13B: Use stride (not user size) to match window arming (line 516)
// ROOT CAUSE: Window is carved with stride spacing, but fast path advanced by user size,
// causing misalignment and missing headers on blocks after the first one.
size_t bs = g_tiny_class_sizes[class_idx];
#if HAKMEM_TINY_HEADER_CLASSIDX
if (class_idx != 7) bs += 1; // stride = user_size + header
#endif
if (__builtin_expect(cur <= end - bs, 1)) {
g_tls_bcur[class_idx] = cur + bs;
#if HAKMEM_DEBUG_COUNTERS
g_bump_hits[class_idx]++;
#endif
HAK_TP1(bump_hit, class_idx);
return (void*)cur;
// ✅ FIX #13: Write header and return BASE pointer
// ROOT CAUSE: Bump allocations didn't write headers, causing corruption when freed.
// SOLUTION: Write header to carved block before returning BASE.
// IMPORTANT: Return BASE (not USER) - caller will convert via HAK_RET_ALLOC.
#if HAKMEM_TINY_HEADER_CLASSIDX
if (class_idx != 7) {
*cur = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);
}
#endif
return (void*)cur; // Return BASE (caller converts to USER via HAK_RET_ALLOC)
}
// Window exhausted
g_tls_bcur[class_idx] = NULL;
@ -484,7 +520,13 @@ static inline void* superslab_tls_bump_fast(int class_idx) {
#endif
g_tls_bcur[class_idx] = start + bs;
g_tls_bend[class_idx] = start + (size_t)chunk * bs;
return (void*)start;
// ✅ FIX #13: Write header and return BASE pointer
#if HAKMEM_TINY_HEADER_CLASSIDX
if (class_idx != 7) {
*start = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);
}
#endif
return (void*)start; // Return BASE (caller converts to USER via HAK_RET_ALLOC)
}
// Frontend: refill FastCache directly from TLS active slab (owner-only) or adopt a slab