Enhance TLS SLL diagnostic logging to detect head corruption source
Critical discovery: TLS SLL head itself is getting corrupted with invalid pointers, not a next-pointer offset issue. Added defensive sanitization and detailed logging. Changes: 1. tls_sll_sanitize_head() - New defensive function - Validates TLS head against SuperSlab metadata - Checks header magic byte consistency - Resets corrupted list immediately on detection - Called at push_enter and pop_enter (defensive walls) 2. Enhanced HDR_RESET diagnostics - Dump both next pointers (offset 0 and tiny_next_off()) - Show first 8 bytes of block (raw dump) - Include next_off value and pointer values - Better correlation with SuperSlab metadata Key Findings from Diagnostic Run (/tmp/sh8_short.log): - TLS head becomes unregistered garbage value at pop_enter - Example: head=0x749fe96c0990 meta_cls=255 idx=-1 ss=(nil) - Sanitize detects and resets the list - SuperSlab registration is SUCCESSFUL (map_count=4) - But head gets corrupted AFTER registration Root Cause Analysis: ✅ NOT a next-pointer offset issue (would be consistent) ❌ TLS head is being OVERWRITTEN by external code - Candidates: TLS variable collision, memset overflow, stray write Corruption Pattern: 1. Superslab initialized successfully (verified by map_count) 2. TLS head is initially correct 3. Between registration and pop_enter: head gets corrupted 4. Corruption value is garbage (unregistered pointer) 5. Lower bytes damaged (0xe1/0x31 patterns) Next Steps: - Check TLS layout and variable boundaries (stack overflow?) - Audit all writes to g_tls_sll array - Look for memset/memcpy operating on wrong range - Consider thread-local storage fragmentation Technical Impact: - Sanitize prevents list propagation (defensive) - But underlying corruption source remains - May be in TLS initialization, variable layout, or external overwrite Performance: Negligible (sanitize is once per pop_enter) 🤖 Generated with Claude Code (https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -176,6 +176,50 @@ static inline int tls_sll_head_valid(hak_base_ptr_t head)
|
|||||||
return (a >= 4096 && a <= 0x00007fffffffffffULL);
|
return (a >= 4096 && a <= 0x00007fffffffffffULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Defensive: validate current TLS head before using it.
|
||||||
|
// If invalid, drop the list to avoid propagating corruption.
|
||||||
|
static inline void tls_sll_sanitize_head(int class_idx, const char* stage)
|
||||||
|
{
|
||||||
|
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
hak_base_ptr_t head = g_tls_sll[class_idx].head;
|
||||||
|
if (hak_base_is_null(head)) return;
|
||||||
|
|
||||||
|
void* raw = HAK_BASE_TO_RAW(head);
|
||||||
|
SuperSlab* ss = hak_super_lookup(raw);
|
||||||
|
int cap = ss ? ss_slabs_capacity(ss) : 0;
|
||||||
|
int idx = (ss && ss->magic == SUPERSLAB_MAGIC) ? slab_index_for(ss, raw) : -1;
|
||||||
|
uint8_t meta_cls = (idx >= 0 && idx < cap) ? ss->slabs[idx].class_idx : 0xff;
|
||||||
|
|
||||||
|
int reset = 0;
|
||||||
|
if (!ss || ss->magic != SUPERSLAB_MAGIC || idx < 0 || idx >= cap || meta_cls != (uint8_t)class_idx) {
|
||||||
|
reset = 1;
|
||||||
|
}
|
||||||
|
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||||
|
if (!reset) {
|
||||||
|
uint8_t hdr = *(uint8_t*)raw;
|
||||||
|
uint8_t expect = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);
|
||||||
|
if (hdr != expect) {
|
||||||
|
reset = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if (reset) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"[TLS_SLL_SANITIZE] stage=%s cls=%d head=%p meta_cls=%u idx=%d ss=%p\n",
|
||||||
|
stage ? stage : "(null)",
|
||||||
|
class_idx,
|
||||||
|
raw,
|
||||||
|
(unsigned)meta_cls,
|
||||||
|
idx,
|
||||||
|
(void*)ss);
|
||||||
|
g_tls_sll[class_idx].head = HAK_BASE_FROM_RAW(NULL);
|
||||||
|
g_tls_sll[class_idx].count = 0;
|
||||||
|
tls_sll_record_writer(class_idx, "sanitize");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static inline void tls_sll_log_hdr_mismatch(int class_idx, hak_base_ptr_t base, uint8_t got, uint8_t expect, const char* stage)
|
static inline void tls_sll_log_hdr_mismatch(int class_idx, hak_base_ptr_t base, uint8_t got, uint8_t expect, const char* stage)
|
||||||
{
|
{
|
||||||
static _Atomic uint32_t g_hdr_mismatch_log = 0;
|
static _Atomic uint32_t g_hdr_mismatch_log = 0;
|
||||||
@ -262,6 +306,9 @@ static inline bool tls_sll_push_impl(int class_idx, hak_base_ptr_t ptr, uint32_t
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Defensive: ensure current head is sane before linking new node.
|
||||||
|
tls_sll_sanitize_head(class_idx, "push");
|
||||||
|
|
||||||
// Capacity semantics:
|
// Capacity semantics:
|
||||||
// - capacity == 0 → disabled (reject)
|
// - capacity == 0 → disabled (reject)
|
||||||
// - capacity > 1<<20 → treat as "unbounded" sentinel (no limit)
|
// - capacity > 1<<20 → treat as "unbounded" sentinel (no limit)
|
||||||
@ -529,6 +576,9 @@ static inline bool tls_sll_pop_impl(int class_idx, hak_base_ptr_t* out, const ch
|
|||||||
}
|
}
|
||||||
atomic_fetch_add(&g_integrity_check_class_bounds, 1);
|
atomic_fetch_add(&g_integrity_check_class_bounds, 1);
|
||||||
|
|
||||||
|
// Defensive: ensure current head is sane before accessing it.
|
||||||
|
tls_sll_sanitize_head(class_idx, "pop_enter");
|
||||||
|
|
||||||
hak_base_ptr_t base = g_tls_sll[class_idx].head;
|
hak_base_ptr_t base = g_tls_sll[class_idx].head;
|
||||||
if (hak_base_is_null(base)) {
|
if (hak_base_is_null(base)) {
|
||||||
return false;
|
return false;
|
||||||
@ -628,6 +678,15 @@ static inline bool tls_sll_pop_impl(int class_idx, hak_base_ptr_t* out, const ch
|
|||||||
static _Atomic uint32_t g_hdr_reset_diag = 0;
|
static _Atomic uint32_t g_hdr_reset_diag = 0;
|
||||||
uint32_t shot = atomic_fetch_add_explicit(&g_hdr_reset_diag, 1, memory_order_relaxed);
|
uint32_t shot = atomic_fetch_add_explicit(&g_hdr_reset_diag, 1, memory_order_relaxed);
|
||||||
if (shot < 8) {
|
if (shot < 8) {
|
||||||
|
// Extra diagnostics: dump raw next pointers at offsets 0 and tiny_next_off()
|
||||||
|
uintptr_t next_raw_off0 = 0;
|
||||||
|
uintptr_t next_raw_off1 = 0;
|
||||||
|
size_t next_off = tiny_next_off(class_idx);
|
||||||
|
memcpy(&next_raw_off0, raw_base, sizeof(next_raw_off0));
|
||||||
|
memcpy(&next_raw_off1, (uint8_t*)raw_base + next_off, sizeof(next_raw_off1));
|
||||||
|
uint8_t dump8[8] = {0};
|
||||||
|
memcpy(dump8, raw_base, sizeof(dump8));
|
||||||
|
|
||||||
SuperSlab* ss_diag = hak_super_lookup(raw_base);
|
SuperSlab* ss_diag = hak_super_lookup(raw_base);
|
||||||
int slab_idx = ss_diag ? slab_index_for(ss_diag, raw_base) : -1;
|
int slab_idx = ss_diag ? slab_index_for(ss_diag, raw_base) : -1;
|
||||||
uint8_t meta_cls = 0xff;
|
uint8_t meta_cls = 0xff;
|
||||||
@ -638,7 +697,8 @@ static inline bool tls_sll_pop_impl(int class_idx, hak_base_ptr_t* out, const ch
|
|||||||
PTR_NEXT_READ("tls_hdr_reset_diag", class_idx, raw_base, 0, raw_next_diag);
|
PTR_NEXT_READ("tls_hdr_reset_diag", class_idx, raw_base, 0, raw_next_diag);
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"[TLS_SLL_HDR_RESET] shot=%u cls=%d base=%p got=0x%02x expect=0x%02x "
|
"[TLS_SLL_HDR_RESET] shot=%u cls=%d base=%p got=0x%02x expect=0x%02x "
|
||||||
"next=%p meta_cls=%u slab_idx=%d last_writer=%s last_push=%p count=%llu\n",
|
"next=%p meta_cls=%u slab_idx=%d last_writer=%s last_push=%p count=%llu "
|
||||||
|
"next_off=%zu next_raw0=%p next_raw1=%p bytes=%02x%02x%02x%02x%02x%02x%02x%02x\n",
|
||||||
shot + 1,
|
shot + 1,
|
||||||
class_idx,
|
class_idx,
|
||||||
raw_base,
|
raw_base,
|
||||||
@ -649,7 +709,12 @@ static inline bool tls_sll_pop_impl(int class_idx, hak_base_ptr_t* out, const ch
|
|||||||
slab_idx,
|
slab_idx,
|
||||||
g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)",
|
g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)",
|
||||||
HAK_BASE_TO_RAW(s_tls_sll_last_push[class_idx]),
|
HAK_BASE_TO_RAW(s_tls_sll_last_push[class_idx]),
|
||||||
(unsigned long long)cnt);
|
(unsigned long long)cnt,
|
||||||
|
next_off,
|
||||||
|
(void*)next_raw_off0,
|
||||||
|
(void*)next_raw_off1,
|
||||||
|
dump8[0], dump8[1], dump8[2], dump8[3],
|
||||||
|
dump8[4], dump8[5], dump8[6], dump8[7]);
|
||||||
} else if (cnt % 10000 == 0) {
|
} else if (cnt % 10000 == 0) {
|
||||||
fprintf(stderr, "[TLS_SLL_HDR_RESET] cls=%d base=%p got=0x%02x expect=0x%02x count=%llu\n",
|
fprintf(stderr, "[TLS_SLL_HDR_RESET] cls=%d base=%p got=0x%02x expect=0x%02x count=%llu\n",
|
||||||
class_idx, raw_base, got, expect, (unsigned long long)cnt);
|
class_idx, raw_base, got, expect, (unsigned long long)cnt);
|
||||||
|
|||||||
Reference in New Issue
Block a user