Enhance TLS SLL diagnostic logging to detect head corruption source

Critical discovery: TLS SLL head itself is getting corrupted with invalid pointers,
not a next-pointer offset issue. Added defensive sanitization and detailed logging.

Changes:
1. tls_sll_sanitize_head() - New defensive function
   - Validates TLS head against SuperSlab metadata
   - Checks header magic byte consistency
   - Resets corrupted list immediately on detection
   - Called at push_enter and pop_enter (defensive walls)

2. Enhanced HDR_RESET diagnostics
   - Dump both next pointers (offset 0 and tiny_next_off())
   - Show first 8 bytes of block (raw dump)
   - Include next_off value and pointer values
   - Better correlation with SuperSlab metadata

Key Findings from Diagnostic Run (/tmp/sh8_short.log):
- TLS head becomes unregistered garbage value at pop_enter
- Example: head=0x749fe96c0990 meta_cls=255 idx=-1 ss=(nil)
- Sanitize detects and resets the list
- SuperSlab registration is SUCCESSFUL (map_count=4)
- But head gets corrupted AFTER registration

Root Cause Analysis:
 NOT a next-pointer offset issue (would be consistent)
 TLS head is being OVERWRITTEN by external code
   - Candidates: TLS variable collision, memset overflow, stray write

Corruption Pattern:
1. Superslab initialized successfully (verified by map_count)
2. TLS head is initially correct
3. Between registration and pop_enter: head gets corrupted
4. Corruption value is garbage (unregistered pointer)
5. Lower bytes damaged (0xe1/0x31 patterns)

Next Steps:
- Check TLS layout and variable boundaries (stack overflow?)
- Audit all writes to g_tls_sll array
- Look for memset/memcpy operating on wrong range
- Consider thread-local storage fragmentation

Technical Impact:
- Sanitize prevents list propagation (defensive)
- But underlying corruption source remains
- May be in TLS initialization, variable layout, or external overwrite

Performance: Negligible (sanitize is once per pop_enter)

🤖 Generated with Claude Code (https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-12-03 21:01:25 +09:00
parent c6aeca0667
commit 4d2784c52f

View File

@ -176,6 +176,50 @@ static inline int tls_sll_head_valid(hak_base_ptr_t head)
return (a >= 4096 && a <= 0x00007fffffffffffULL); return (a >= 4096 && a <= 0x00007fffffffffffULL);
} }
// Defensive: validate current TLS head before using it.
// If invalid, drop the list to avoid propagating corruption.
static inline void tls_sll_sanitize_head(int class_idx, const char* stage)
{
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
return;
}
hak_base_ptr_t head = g_tls_sll[class_idx].head;
if (hak_base_is_null(head)) return;
void* raw = HAK_BASE_TO_RAW(head);
SuperSlab* ss = hak_super_lookup(raw);
int cap = ss ? ss_slabs_capacity(ss) : 0;
int idx = (ss && ss->magic == SUPERSLAB_MAGIC) ? slab_index_for(ss, raw) : -1;
uint8_t meta_cls = (idx >= 0 && idx < cap) ? ss->slabs[idx].class_idx : 0xff;
int reset = 0;
if (!ss || ss->magic != SUPERSLAB_MAGIC || idx < 0 || idx >= cap || meta_cls != (uint8_t)class_idx) {
reset = 1;
}
#if HAKMEM_TINY_HEADER_CLASSIDX
if (!reset) {
uint8_t hdr = *(uint8_t*)raw;
uint8_t expect = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);
if (hdr != expect) {
reset = 1;
}
}
#endif
if (reset) {
fprintf(stderr,
"[TLS_SLL_SANITIZE] stage=%s cls=%d head=%p meta_cls=%u idx=%d ss=%p\n",
stage ? stage : "(null)",
class_idx,
raw,
(unsigned)meta_cls,
idx,
(void*)ss);
g_tls_sll[class_idx].head = HAK_BASE_FROM_RAW(NULL);
g_tls_sll[class_idx].count = 0;
tls_sll_record_writer(class_idx, "sanitize");
}
}
static inline void tls_sll_log_hdr_mismatch(int class_idx, hak_base_ptr_t base, uint8_t got, uint8_t expect, const char* stage) static inline void tls_sll_log_hdr_mismatch(int class_idx, hak_base_ptr_t base, uint8_t got, uint8_t expect, const char* stage)
{ {
static _Atomic uint32_t g_hdr_mismatch_log = 0; static _Atomic uint32_t g_hdr_mismatch_log = 0;
@ -262,6 +306,9 @@ static inline bool tls_sll_push_impl(int class_idx, hak_base_ptr_t ptr, uint32_t
return false; return false;
} }
// Defensive: ensure current head is sane before linking new node.
tls_sll_sanitize_head(class_idx, "push");
// Capacity semantics: // Capacity semantics:
// - capacity == 0 → disabled (reject) // - capacity == 0 → disabled (reject)
// - capacity > 1<<20 → treat as "unbounded" sentinel (no limit) // - capacity > 1<<20 → treat as "unbounded" sentinel (no limit)
@ -529,6 +576,9 @@ static inline bool tls_sll_pop_impl(int class_idx, hak_base_ptr_t* out, const ch
} }
atomic_fetch_add(&g_integrity_check_class_bounds, 1); atomic_fetch_add(&g_integrity_check_class_bounds, 1);
// Defensive: ensure current head is sane before accessing it.
tls_sll_sanitize_head(class_idx, "pop_enter");
hak_base_ptr_t base = g_tls_sll[class_idx].head; hak_base_ptr_t base = g_tls_sll[class_idx].head;
if (hak_base_is_null(base)) { if (hak_base_is_null(base)) {
return false; return false;
@ -628,6 +678,15 @@ static inline bool tls_sll_pop_impl(int class_idx, hak_base_ptr_t* out, const ch
static _Atomic uint32_t g_hdr_reset_diag = 0; static _Atomic uint32_t g_hdr_reset_diag = 0;
uint32_t shot = atomic_fetch_add_explicit(&g_hdr_reset_diag, 1, memory_order_relaxed); uint32_t shot = atomic_fetch_add_explicit(&g_hdr_reset_diag, 1, memory_order_relaxed);
if (shot < 8) { if (shot < 8) {
// Extra diagnostics: dump raw next pointers at offsets 0 and tiny_next_off()
uintptr_t next_raw_off0 = 0;
uintptr_t next_raw_off1 = 0;
size_t next_off = tiny_next_off(class_idx);
memcpy(&next_raw_off0, raw_base, sizeof(next_raw_off0));
memcpy(&next_raw_off1, (uint8_t*)raw_base + next_off, sizeof(next_raw_off1));
uint8_t dump8[8] = {0};
memcpy(dump8, raw_base, sizeof(dump8));
SuperSlab* ss_diag = hak_super_lookup(raw_base); SuperSlab* ss_diag = hak_super_lookup(raw_base);
int slab_idx = ss_diag ? slab_index_for(ss_diag, raw_base) : -1; int slab_idx = ss_diag ? slab_index_for(ss_diag, raw_base) : -1;
uint8_t meta_cls = 0xff; uint8_t meta_cls = 0xff;
@ -638,7 +697,8 @@ static inline bool tls_sll_pop_impl(int class_idx, hak_base_ptr_t* out, const ch
PTR_NEXT_READ("tls_hdr_reset_diag", class_idx, raw_base, 0, raw_next_diag); PTR_NEXT_READ("tls_hdr_reset_diag", class_idx, raw_base, 0, raw_next_diag);
fprintf(stderr, fprintf(stderr,
"[TLS_SLL_HDR_RESET] shot=%u cls=%d base=%p got=0x%02x expect=0x%02x " "[TLS_SLL_HDR_RESET] shot=%u cls=%d base=%p got=0x%02x expect=0x%02x "
"next=%p meta_cls=%u slab_idx=%d last_writer=%s last_push=%p count=%llu\n", "next=%p meta_cls=%u slab_idx=%d last_writer=%s last_push=%p count=%llu "
"next_off=%zu next_raw0=%p next_raw1=%p bytes=%02x%02x%02x%02x%02x%02x%02x%02x\n",
shot + 1, shot + 1,
class_idx, class_idx,
raw_base, raw_base,
@ -649,7 +709,12 @@ static inline bool tls_sll_pop_impl(int class_idx, hak_base_ptr_t* out, const ch
slab_idx, slab_idx,
g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)", g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)",
HAK_BASE_TO_RAW(s_tls_sll_last_push[class_idx]), HAK_BASE_TO_RAW(s_tls_sll_last_push[class_idx]),
(unsigned long long)cnt); (unsigned long long)cnt,
next_off,
(void*)next_raw_off0,
(void*)next_raw_off1,
dump8[0], dump8[1], dump8[2], dump8[3],
dump8[4], dump8[5], dump8[6], dump8[7]);
} else if (cnt % 10000 == 0) { } else if (cnt % 10000 == 0) {
fprintf(stderr, "[TLS_SLL_HDR_RESET] cls=%d base=%p got=0x%02x expect=0x%02x count=%llu\n", fprintf(stderr, "[TLS_SLL_HDR_RESET] cls=%d base=%p got=0x%02x expect=0x%02x count=%llu\n",
class_idx, raw_base, got, expect, (unsigned long long)cnt); class_idx, raw_base, got, expect, (unsigned long long)cnt);