feat: Add ACE allocation failure tracing and debug hooks

This commit introduces a comprehensive tracing mechanism for allocation failures within the Adaptive Cache Engine (ACE) component. This feature allows for precise identification of the root cause for Out-Of-Memory (OOM) issues related to ACE allocations.

Key changes include:
- **ACE Tracing Implementation**:
  - Added  environment variable to enable/disable detailed logging of allocation failures.
  - Instrumented , , and  to distinguish between "Threshold" (size class mismatch), "Exhaustion" (pool depletion), and "MapFail" (OS memory allocation failure).
- **Build System Fixes**:
  - Corrected  to ensure  is properly linked into , resolving an  error.
- **LD_PRELOAD Wrapper Adjustments**:
  - Investigated and understood the  wrapper's behavior under , particularly its interaction with  and  checks.
  - Enabled debugging flags for  environment to prevent unintended fallbacks to 's  for non-tiny allocations, allowing comprehensive testing of the  allocator.
- **Debugging & Verification**:
  - Introduced temporary verbose logging to pinpoint execution flow issues within  interception and  routing. These temporary logs have been removed.
  - Created  to facilitate testing of the tracing features.

This feature will significantly aid in diagnosing and resolving allocation-related OOM issues in  by providing clear insights into the failure pathways.
This commit is contained in:
Moe Charm (CI)
2025-12-01 16:37:59 +09:00
parent 2bd8da9267
commit 4ef0171bc0
85 changed files with 5930 additions and 479 deletions

View File

@ -24,6 +24,7 @@
#include <stdlib.h>
#include <stdatomic.h>
#include "../hakmem_internal.h" // Phase 10: Type Safety (hak_base_ptr_t)
#include "../hakmem_tiny_config.h"
#include "../hakmem_build_flags.h"
#include "../hakmem_debug_master.h" // For unified debug level control
@ -39,7 +40,7 @@
#include "tiny_header_box.h" // Header Box: Single Source of Truth for header operations
// Per-thread debug shadow: last successful push base per class (release-safe)
static __thread void* s_tls_sll_last_push[TINY_NUM_CLASSES] = {0};
static __thread hak_base_ptr_t s_tls_sll_last_push[TINY_NUM_CLASSES] = {0};
// Per-thread callsite tracking: last push caller per class (debug-only)
#if !HAKMEM_BUILD_RELEASE
@ -63,18 +64,19 @@ static int g_tls_sll_push_line[TINY_NUM_CLASSES] = {0};
// ========== Debug guard ==========
#if !HAKMEM_BUILD_RELEASE
static inline void tls_sll_debug_guard(int class_idx, void* base, const char* where)
static inline void tls_sll_debug_guard(int class_idx, hak_base_ptr_t base, const char* where)
{
(void)class_idx;
if ((uintptr_t)base < 4096) {
void* raw = HAK_BASE_TO_RAW(base);
if ((uintptr_t)raw < 4096) {
fprintf(stderr,
"[TLS_SLL_GUARD] %s: suspicious ptr=%p cls=%d\n",
where, base, class_idx);
where, raw, class_idx);
abort();
}
}
#else
static inline void tls_sll_debug_guard(int class_idx, void* base, const char* where)
static inline void tls_sll_debug_guard(int class_idx, hak_base_ptr_t base, const char* where)
{
(void)class_idx; (void)base; (void)where;
}
@ -82,25 +84,26 @@ static inline void tls_sll_debug_guard(int class_idx, void* base, const char* wh
// Normalize helper: callers are required to pass BASE already.
// Kept as a no-op for documentation / future hardening.
static inline void* tls_sll_normalize_base(int class_idx, void* node)
static inline hak_base_ptr_t tls_sll_normalize_base(int class_idx, hak_base_ptr_t node)
{
#if HAKMEM_TINY_HEADER_CLASSIDX
if (node && class_idx >= 0 && class_idx < TINY_NUM_CLASSES) {
if (!hak_base_is_null(node) && class_idx >= 0 && class_idx < TINY_NUM_CLASSES) {
extern const size_t g_tiny_class_sizes[];
size_t stride = g_tiny_class_sizes[class_idx];
void* raw = HAK_BASE_TO_RAW(node);
if (__builtin_expect(stride != 0, 1)) {
uintptr_t delta = (uintptr_t)node % stride;
uintptr_t delta = (uintptr_t)raw % stride;
if (__builtin_expect(delta == 1, 0)) {
// USER pointer passed in; normalize to BASE (= user-1) to avoid offset-1 writes.
void* base = (uint8_t*)node - 1;
void* base = (uint8_t*)raw - 1;
static _Atomic uint32_t g_tls_sll_norm_userptr = 0;
uint32_t n = atomic_fetch_add_explicit(&g_tls_sll_norm_userptr, 1, memory_order_relaxed);
if (n < 8) {
fprintf(stderr,
"[TLS_SLL_NORMALIZE_USERPTR] cls=%d node=%p -> base=%p stride=%zu\n",
class_idx, node, base, stride);
class_idx, raw, base, stride);
}
return base;
return HAK_BASE_FROM_RAW(base);
}
}
}
@ -146,13 +149,13 @@ static inline void tls_sll_dump_tls_window(int class_idx, const char* stage)
shot + 1,
stage ? stage : "(null)",
class_idx,
g_tls_sll[class_idx].head,
HAK_BASE_TO_RAW(g_tls_sll[class_idx].head),
g_tls_sll[class_idx].count,
s_tls_sll_last_push[class_idx],
HAK_BASE_TO_RAW(s_tls_sll_last_push[class_idx]),
g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)");
fprintf(stderr, " tls_sll snapshot (head/count):");
for (int c = 0; c < TINY_NUM_CLASSES; c++) {
fprintf(stderr, " C%d:%p/%u", c, g_tls_sll[c].head, g_tls_sll[c].count);
fprintf(stderr, " C%d:%p/%u", c, HAK_BASE_TO_RAW(g_tls_sll[c].head), g_tls_sll[c].count);
}
fprintf(stderr, " canary_before=%#llx canary_after=%#llx\n",
(unsigned long long)g_tls_canary_before_sll,
@ -169,13 +172,13 @@ static inline void tls_sll_record_writer(int class_idx, const char* who)
}
}
static inline int tls_sll_head_valid(void* head)
static inline int tls_sll_head_valid(hak_base_ptr_t head)
{
uintptr_t a = (uintptr_t)head;
uintptr_t a = (uintptr_t)HAK_BASE_TO_RAW(head);
return (a >= 4096 && a <= 0x00007fffffffffffULL);
}
static inline void tls_sll_log_hdr_mismatch(int class_idx, void* base, uint8_t got, uint8_t expect, const char* stage)
static inline void tls_sll_log_hdr_mismatch(int class_idx, hak_base_ptr_t base, uint8_t got, uint8_t expect, const char* stage)
{
static _Atomic uint32_t g_hdr_mismatch_log = 0;
uint32_t n = atomic_fetch_add_explicit(&g_hdr_mismatch_log, 1, memory_order_relaxed);
@ -184,13 +187,13 @@ static inline void tls_sll_log_hdr_mismatch(int class_idx, void* base, uint8_t g
"[TLS_SLL_HDR_MISMATCH] stage=%s cls=%d base=%p got=0x%02x expect=0x%02x\n",
stage ? stage : "(null)",
class_idx,
base,
HAK_BASE_TO_RAW(base),
got,
expect);
}
}
static inline void tls_sll_diag_next(int class_idx, void* base, void* next, const char* stage)
static inline void tls_sll_diag_next(int class_idx, hak_base_ptr_t base, hak_base_ptr_t next, const char* stage)
{
#if !HAKMEM_BUILD_RELEASE
static int s_diag_enable = -1;
@ -203,18 +206,19 @@ static inline void tls_sll_diag_next(int class_idx, void* base, void* next, cons
// Narrow to target classes to preserve early shots
if (class_idx != 4 && class_idx != 6 && class_idx != 7) return;
void* raw_next = HAK_BASE_TO_RAW(next);
int in_range = tls_sll_head_valid(next);
if (in_range) {
// Range check (abort on clearly bad pointers to catch first offender)
validate_ptr_range(next, "tls_sll_pop_next_diag");
validate_ptr_range(raw_next, "tls_sll_pop_next_diag");
}
SuperSlab* ss = hak_super_lookup(next);
int slab_idx = ss ? slab_index_for(ss, next) : -1;
SuperSlab* ss = hak_super_lookup(raw_next);
int slab_idx = ss ? slab_index_for(ss, raw_next) : -1;
TinySlabMeta* meta = (ss && slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) ? &ss->slabs[slab_idx] : NULL;
int meta_cls = meta ? (int)meta->class_idx : -1;
#if HAKMEM_TINY_HEADER_CLASSIDX
int hdr_cls = next ? tiny_region_id_read_header((uint8_t*)next + 1) : -1;
int hdr_cls = raw_next ? tiny_region_id_read_header((uint8_t*)raw_next + 1) : -1;
#else
int hdr_cls = -1;
#endif
@ -227,8 +231,8 @@ static inline void tls_sll_diag_next(int class_idx, void* base, void* next, cons
shot + 1,
stage ? stage : "(null)",
class_idx,
base,
next,
HAK_BASE_TO_RAW(base),
raw_next,
hdr_cls,
meta_cls,
slab_idx,
@ -247,7 +251,7 @@ static inline void tls_sll_diag_next(int class_idx, void* base, void* next, cons
// Implementation function with callsite tracking (where).
// Use tls_sll_push() macro instead of calling directly.
static inline bool tls_sll_push_impl(int class_idx, void* ptr, uint32_t capacity, const char* where)
static inline bool tls_sll_push_impl(int class_idx, hak_base_ptr_t ptr, uint32_t capacity, const char* where)
{
HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_push");
@ -265,19 +269,20 @@ static inline bool tls_sll_push_impl(int class_idx, void* ptr, uint32_t capacity
const uint32_t kCapacityHardMax = (1u << 20);
const int unlimited = (capacity > kCapacityHardMax);
if (!ptr) {
if (hak_base_is_null(ptr)) {
return false;
}
// Base pointer only (callers must pass BASE; this is a no-op by design).
ptr = tls_sll_normalize_base(class_idx, ptr);
void* raw_ptr = HAK_BASE_TO_RAW(ptr);
// Detect meta/class mismatch on push (first few only).
do {
static _Atomic uint32_t g_tls_sll_push_meta_mis = 0;
struct SuperSlab* ss = hak_super_lookup(ptr);
struct SuperSlab* ss = hak_super_lookup(raw_ptr);
if (ss && ss->magic == SUPERSLAB_MAGIC) {
int sidx = slab_index_for(ss, ptr);
int sidx = slab_index_for(ss, raw_ptr);
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
uint8_t meta_cls = ss->slabs[sidx].class_idx;
if (meta_cls < TINY_NUM_CLASSES && meta_cls != (uint8_t)class_idx) {
@ -285,7 +290,7 @@ static inline bool tls_sll_push_impl(int class_idx, void* ptr, uint32_t capacity
if (n < 4) {
fprintf(stderr,
"[TLS_SLL_PUSH_META_MISMATCH] cls=%d meta_cls=%u base=%p slab_idx=%d ss=%p\n",
class_idx, (unsigned)meta_cls, ptr, sidx, (void*)ss);
class_idx, (unsigned)meta_cls, raw_ptr, sidx, (void*)ss);
void* bt[8];
int frames = backtrace(bt, 8);
backtrace_symbols_fd(bt, frames, fileno(stderr));
@ -312,14 +317,14 @@ static inline bool tls_sll_push_impl(int class_idx, void* ptr, uint32_t capacity
if (__builtin_expect(g_validate_hdr, 0)) {
static _Atomic uint32_t g_tls_sll_push_bad_hdr = 0;
uint8_t hdr = *(uint8_t*)ptr;
uint8_t hdr = *(uint8_t*)raw_ptr;
uint8_t expected = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);
if (hdr != expected) {
uint32_t n = atomic_fetch_add_explicit(&g_tls_sll_push_bad_hdr, 1, memory_order_relaxed);
if (n < 10) {
fprintf(stderr,
"[TLS_SLL_PUSH_BAD_HDR] cls=%d base=%p got=0x%02x expect=0x%02x from=%s\n",
class_idx, ptr, hdr, expected, where ? where : "(null)");
class_idx, raw_ptr, hdr, expected, where ? where : "(null)");
void* bt[8];
int frames = backtrace(bt, 8);
backtrace_symbols_fd(bt, frames, fileno(stderr));
@ -332,22 +337,22 @@ static inline bool tls_sll_push_impl(int class_idx, void* ptr, uint32_t capacity
#if !HAKMEM_BUILD_RELEASE
// Minimal range guard before we touch memory.
if (!validate_ptr_range(ptr, "tls_sll_push_base")) {
if (!validate_ptr_range(raw_ptr, "tls_sll_push_base")) {
fprintf(stderr,
"[TLS_SLL_PUSH] FATAL invalid BASE ptr cls=%d base=%p\n",
class_idx, ptr);
class_idx, raw_ptr);
abort();
}
#else
// Release: drop malformed ptrs but keep running.
uintptr_t ptr_addr = (uintptr_t)ptr;
uintptr_t ptr_addr = (uintptr_t)raw_ptr;
if (ptr_addr < 4096 || ptr_addr > 0x00007fffffffffffULL) {
extern _Atomic uint64_t g_tls_sll_invalid_push[];
uint64_t cnt = atomic_fetch_add_explicit(&g_tls_sll_invalid_push[class_idx], 1, memory_order_relaxed);
static __thread uint8_t s_log_limit_push[TINY_NUM_CLASSES] = {0};
if (s_log_limit_push[class_idx] < 4) {
fprintf(stderr, "[TLS_SLL_PUSH_INVALID] cls=%d base=%p dropped count=%llu\n",
class_idx, ptr, (unsigned long long)cnt + 1);
class_idx, raw_ptr, (unsigned long long)cnt + 1);
s_log_limit_push[class_idx]++;
}
return false;
@ -375,7 +380,7 @@ static inline bool tls_sll_push_impl(int class_idx, void* ptr, uint32_t capacity
g_sll_ring_en = (r && *r && *r != '0') ? 1 : 0;
}
// ptr is BASE pointer, header is at ptr+0
uint8_t* b = (uint8_t*)ptr;
uint8_t* b = (uint8_t*)raw_ptr;
uint8_t got_pre, expected;
tiny_header_validate(b, class_idx, &got_pre, &expected);
if (__builtin_expect(got_pre != expected, 0)) {
@ -388,7 +393,7 @@ static inline bool tls_sll_push_impl(int class_idx, void* ptr, uint32_t capacity
if (__builtin_expect(g_sll_ring_en, 0)) {
// aux encodes: high 8 bits = got, low 8 bits = expected
uintptr_t aux = ((uintptr_t)got << 8) | (uintptr_t)expected;
tiny_debug_ring_record(0x7F10 /*TLS_SLL_REJECT*/, (uint16_t)class_idx, ptr, aux);
tiny_debug_ring_record(0x7F10 /*TLS_SLL_REJECT*/, (uint16_t)class_idx, raw_ptr, aux);
}
return false;
}
@ -405,21 +410,21 @@ static inline bool tls_sll_push_impl(int class_idx, void* ptr, uint32_t capacity
// Optional double-free detection: scan a bounded prefix of the list.
// Increased from 64 to 256 to catch orphaned blocks deeper in the chain.
{
void* scan = g_tls_sll[class_idx].head;
hak_base_ptr_t scan = g_tls_sll[class_idx].head;
uint32_t scanned = 0;
const uint32_t limit = (g_tls_sll[class_idx].count < 256)
? g_tls_sll[class_idx].count
: 256;
while (scan && scanned < limit) {
if (scan == ptr) {
while (!hak_base_is_null(scan) && scanned < limit) {
if (hak_base_eq(scan, ptr)) {
fprintf(stderr,
"[TLS_SLL_PUSH_DUP] cls=%d ptr=%p head=%p count=%u scanned=%u last_push=%p last_push_from=%s last_pop_from=%s last_writer=%s where=%s\n",
class_idx,
ptr,
g_tls_sll[class_idx].head,
raw_ptr,
HAK_BASE_TO_RAW(g_tls_sll[class_idx].head),
g_tls_sll[class_idx].count,
scanned,
s_tls_sll_last_push[class_idx],
HAK_BASE_TO_RAW(s_tls_sll_last_push[class_idx]),
s_tls_sll_last_push_from[class_idx] ? s_tls_sll_last_push_from[class_idx] : "(null)",
s_tls_sll_last_pop_from[class_idx] ? s_tls_sll_last_pop_from[class_idx] : "(null)",
g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)",
@ -428,16 +433,17 @@ static inline bool tls_sll_push_impl(int class_idx, void* ptr, uint32_t capacity
// ABORT to get backtrace showing exact double-free location
abort();
}
void* next;
PTR_NEXT_READ("tls_sll_scan", class_idx, scan, 0, next);
scan = next;
void* next_raw;
PTR_NEXT_READ("tls_sll_scan", class_idx, HAK_BASE_TO_RAW(scan), 0, next_raw);
scan = HAK_BASE_FROM_RAW(next_raw);
scanned++;
}
}
#endif
// Link new node to current head via Box API (offset is handled inside tiny_nextptr).
PTR_NEXT_WRITE("tls_push", class_idx, ptr, 0, g_tls_sll[class_idx].head);
// Note: g_tls_sll[...].head is hak_base_ptr_t, but PTR_NEXT_WRITE takes void* val.
PTR_NEXT_WRITE("tls_push", class_idx, raw_ptr, 0, HAK_BASE_TO_RAW(g_tls_sll[class_idx].head));
g_tls_sll[class_idx].head = ptr;
tls_sll_record_writer(class_idx, "push");
g_tls_sll[class_idx].count = cur + 1;
@ -450,7 +456,7 @@ static inline bool tls_sll_push_impl(int class_idx, void* ptr, uint32_t capacity
const char* file, int line);
extern _Atomic uint64_t g_ptr_trace_op_counter;
uint64_t _trace_op = atomic_fetch_add_explicit(&g_ptr_trace_op_counter, 1, memory_order_relaxed);
ptr_trace_record_impl(4 /*PTR_EVENT_FREE_TLS_PUSH*/, ptr, class_idx, _trace_op,
ptr_trace_record_impl(4 /*PTR_EVENT_FREE_TLS_PUSH*/, raw_ptr, class_idx, _trace_op,
NULL, g_tls_sll[class_idx].count, 0,
where ? where : __FILE__, __LINE__);
#endif
@ -473,7 +479,7 @@ static inline bool tls_sll_push_impl(int class_idx, void* ptr, uint32_t capacity
// Implementation function with callsite tracking (where).
// Use tls_sll_pop() macro instead of calling directly.
static inline bool tls_sll_pop_impl(int class_idx, void** out, const char* where)
static inline bool tls_sll_pop_impl(int class_idx, hak_base_ptr_t* out, const char* where)
{
HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_pop");
// Class mask gate: if disallowed, behave as empty
@ -482,14 +488,15 @@ static inline bool tls_sll_pop_impl(int class_idx, void** out, const char* where
}
atomic_fetch_add(&g_integrity_check_class_bounds, 1);
void* base = g_tls_sll[class_idx].head;
if (!base) {
hak_base_ptr_t base = g_tls_sll[class_idx].head;
if (hak_base_is_null(base)) {
return false;
}
void* raw_base = HAK_BASE_TO_RAW(base);
// Sentinel guard: remote sentinel must never be in TLS SLL.
if (__builtin_expect((uintptr_t)base == TINY_REMOTE_SENTINEL, 0)) {
g_tls_sll[class_idx].head = NULL;
if (__builtin_expect((uintptr_t)raw_base == TINY_REMOTE_SENTINEL, 0)) {
g_tls_sll[class_idx].head = HAK_BASE_FROM_RAW(NULL);
g_tls_sll[class_idx].count = 0;
tls_sll_record_writer(class_idx, "pop_sentinel_reset");
#if !HAKMEM_BUILD_RELEASE
@ -504,38 +511,38 @@ static inline bool tls_sll_pop_impl(int class_idx, void** out, const char* where
g_sll_ring_en = (r && *r && *r != '0') ? 1 : 0;
}
if (__builtin_expect(g_sll_ring_en, 0)) {
tiny_debug_ring_record(0x7F11 /*TLS_SLL_SENTINEL*/, (uint16_t)class_idx, base, 0);
tiny_debug_ring_record(0x7F11 /*TLS_SLL_SENTINEL*/, (uint16_t)class_idx, raw_base, 0);
}
}
return false;
}
#if !HAKMEM_BUILD_RELEASE
if (!validate_ptr_range(base, "tls_sll_pop_base")) {
if (!validate_ptr_range(raw_base, "tls_sll_pop_base")) {
fprintf(stderr,
"[TLS_SLL_POP] FATAL invalid BASE ptr cls=%d base=%p\n",
class_idx, base);
class_idx, raw_base);
abort();
}
#else
// Fail-fast even in release: drop malformed TLS head to avoid SEGV on bad base.
uintptr_t base_addr = (uintptr_t)base;
uintptr_t base_addr = (uintptr_t)raw_base;
if (base_addr < 4096 || base_addr > 0x00007fffffffffffULL) {
extern _Atomic uint64_t g_tls_sll_invalid_head[];
uint64_t cnt = atomic_fetch_add_explicit(&g_tls_sll_invalid_head[class_idx], 1, memory_order_relaxed);
static __thread uint8_t s_log_limit[TINY_NUM_CLASSES] = {0};
if (s_log_limit[class_idx] < 4) {
fprintf(stderr, "[TLS_SLL_POP_INVALID] cls=%d head=%p dropped count=%llu\n",
class_idx, base, (unsigned long long)cnt + 1);
class_idx, raw_base, (unsigned long long)cnt + 1);
s_log_limit[class_idx]++;
}
// Help triage: show last successful push base for this thread/class
if (s_tls_sll_last_push[class_idx] && s_log_limit[class_idx] <= 4) {
if (!hak_base_is_null(s_tls_sll_last_push[class_idx]) && s_log_limit[class_idx] <= 4) {
fprintf(stderr, "[TLS_SLL_POP_INVALID] cls=%d last_push=%p\n",
class_idx, s_tls_sll_last_push[class_idx]);
class_idx, HAK_BASE_TO_RAW(s_tls_sll_last_push[class_idx]));
}
tls_sll_dump_tls_window(class_idx, "head_range");
g_tls_sll[class_idx].head = NULL;
g_tls_sll[class_idx].head = HAK_BASE_FROM_RAW(NULL);
g_tls_sll[class_idx].count = 0;
tls_sll_record_writer(class_idx, "pop_invalid_head");
return false;
@ -559,14 +566,14 @@ static inline bool tls_sll_pop_impl(int class_idx, void** out, const char* where
// Header validation using Header Box (C1-C6 only; C0/C7 skip)
if (tiny_class_preserves_header(class_idx)) {
uint8_t got, expect;
PTR_TRACK_TLS_POP(base, class_idx);
bool valid = tiny_header_validate(base, class_idx, &got, &expect);
PTR_TRACK_HEADER_READ(base, got);
PTR_TRACK_TLS_POP(raw_base, class_idx);
bool valid = tiny_header_validate(raw_base, class_idx, &got, &expect);
PTR_TRACK_HEADER_READ(raw_base, got);
if (__builtin_expect(!valid, 0)) {
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr,
"[TLS_SLL_POP] CORRUPTED HEADER cls=%d base=%p got=0x%02x expect=0x%02x\n",
class_idx, base, got, expect);
class_idx, raw_base, got, expect);
ptr_trace_dump_now("header_corruption");
abort();
#else
@ -576,9 +583,9 @@ static inline bool tls_sll_pop_impl(int class_idx, void** out, const char* where
uint64_t cnt = atomic_fetch_add_explicit(&g_hdr_reset_count, 1, memory_order_relaxed);
if (cnt % 10000 == 0) {
fprintf(stderr, "[TLS_SLL_HDR_RESET] cls=%d base=%p got=0x%02x expect=0x%02x count=%llu\n",
class_idx, base, got, expect, (unsigned long long)cnt);
class_idx, raw_base, got, expect, (unsigned long long)cnt);
}
g_tls_sll[class_idx].head = NULL;
g_tls_sll[class_idx].head = HAK_BASE_FROM_RAW(NULL);
g_tls_sll[class_idx].count = 0;
tls_sll_record_writer(class_idx, "header_reset");
{
@ -590,7 +597,7 @@ static inline bool tls_sll_pop_impl(int class_idx, void** out, const char* where
if (__builtin_expect(g_sll_ring_en, 0)) {
// aux encodes: high 8 bits = got, low 8 bits = expect
uintptr_t aux = ((uintptr_t)got << 8) | (uintptr_t)expect;
tiny_debug_ring_record(0x7F12 /*TLS_SLL_HDR_CORRUPT*/, (uint16_t)class_idx, base, aux);
tiny_debug_ring_record(0x7F12 /*TLS_SLL_HDR_CORRUPT*/, (uint16_t)class_idx, raw_base, aux);
}
}
return false;
@ -599,15 +606,16 @@ static inline bool tls_sll_pop_impl(int class_idx, void** out, const char* where
}
// Read next via Box API.
void* next;
PTR_NEXT_READ("tls_pop", class_idx, base, 0, next);
void* raw_next;
PTR_NEXT_READ("tls_pop", class_idx, raw_base, 0, raw_next);
hak_base_ptr_t next = HAK_BASE_FROM_RAW(raw_next);
tls_sll_diag_next(class_idx, base, next, "pop_next");
#if !HAKMEM_BUILD_RELEASE
if (next && !validate_ptr_range(next, "tls_sll_pop_next")) {
if (!hak_base_is_null(next) && !validate_ptr_range(raw_next, "tls_sll_pop_next")) {
fprintf(stderr,
"[TLS_SLL_POP] FATAL invalid next ptr cls=%d base=%p next=%p\n",
class_idx, base, next);
class_idx, raw_base, raw_next);
ptr_trace_dump_now("next_corruption");
abort();
}
@ -615,13 +623,13 @@ static inline bool tls_sll_pop_impl(int class_idx, void** out, const char* where
g_tls_sll[class_idx].head = next;
tls_sll_record_writer(class_idx, "pop");
if ((class_idx == 4 || class_idx == 6) && next && !tls_sll_head_valid(next)) {
if ((class_idx == 4 || class_idx == 6) && !hak_base_is_null(next) && !tls_sll_head_valid(next)) {
fprintf(stderr, "[TLS_SLL_POP_POST_INVALID] cls=%d next=%p last_writer=%s\n",
class_idx,
next,
raw_next,
g_tls_sll_last_writer[class_idx] ? g_tls_sll_last_writer[class_idx] : "(null)");
tls_sll_dump_tls_window(class_idx, "pop_post");
g_tls_sll[class_idx].head = NULL;
g_tls_sll[class_idx].head = HAK_BASE_FROM_RAW(NULL);
g_tls_sll[class_idx].count = 0;
return false;
}
@ -630,7 +638,7 @@ static inline bool tls_sll_pop_impl(int class_idx, void** out, const char* where
}
// Clear next inside popped node to avoid stale-chain issues.
tiny_next_write(class_idx, base, NULL);
tiny_next_write(class_idx, raw_base, NULL);
#if !HAKMEM_BUILD_RELEASE
// Trace TLS SLL pop (debug only)
@ -639,7 +647,7 @@ static inline bool tls_sll_pop_impl(int class_idx, void** out, const char* where
const char* file, int line);
extern _Atomic uint64_t g_ptr_trace_op_counter;
uint64_t _trace_op = atomic_fetch_add_explicit(&g_ptr_trace_op_counter, 1, memory_order_relaxed);
ptr_trace_record_impl(3 /*PTR_EVENT_ALLOC_TLS_POP*/, base, class_idx, _trace_op,
ptr_trace_record_impl(3 /*PTR_EVENT_ALLOC_TLS_POP*/, raw_base, class_idx, _trace_op,
NULL, g_tls_sll[class_idx].count + 1, 0,
where ? where : __FILE__, __LINE__);
@ -652,7 +660,7 @@ static inline bool tls_sll_pop_impl(int class_idx, void** out, const char* where
uint64_t op = atomic_load(&g_debug_op_count);
if (op < 50 && class_idx == 1) {
fprintf(stderr, "[OP#%04lu POP] cls=%d base=%p tls_count_after=%u\n",
(unsigned long)op, class_idx, base,
(unsigned long)op, class_idx, raw_base,
g_tls_sll[class_idx].count);
fflush(stderr);
}
@ -672,13 +680,13 @@ static inline bool tls_sll_pop_impl(int class_idx, void** out, const char* where
// Returns number of nodes actually moved (<= capacity remaining).
static inline uint32_t tls_sll_splice(int class_idx,
void* chain_head,
hak_base_ptr_t chain_head,
uint32_t count,
uint32_t capacity)
{
HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_splice");
if (!chain_head || count == 0 || capacity == 0) {
if (hak_base_is_null(chain_head) || count == 0 || capacity == 0) {
return 0;
}
@ -691,35 +699,37 @@ static inline uint32_t tls_sll_splice(int class_idx,
uint32_t to_move = (count < room) ? count : room;
// Traverse chain up to to_move, validate, and find tail.
void* tail = chain_head;
hak_base_ptr_t tail = chain_head;
uint32_t moved = 1;
tls_sll_debug_guard(class_idx, chain_head, "splice_head");
// Restore header defensively on each node we touch (C1-C6 only; C0/C7 skip)
tiny_header_write_if_preserved(chain_head, class_idx);
tiny_header_write_if_preserved(HAK_BASE_TO_RAW(chain_head), class_idx);
while (moved < to_move) {
tls_sll_debug_guard(class_idx, tail, "splice_traverse");
void* next;
PTR_NEXT_READ("tls_splice_trav", class_idx, tail, 0, next);
if (next && !tls_sll_head_valid(next)) {
void* raw_next;
PTR_NEXT_READ("tls_splice_trav", class_idx, HAK_BASE_TO_RAW(tail), 0, raw_next);
hak_base_ptr_t next = HAK_BASE_FROM_RAW(raw_next);
if (!hak_base_is_null(next) && !tls_sll_head_valid(next)) {
static _Atomic uint32_t g_splice_diag = 0;
uint32_t shot = atomic_fetch_add_explicit(&g_splice_diag, 1, memory_order_relaxed);
if (shot < 8) {
fprintf(stderr,
"[TLS_SLL_SPLICE_INVALID_NEXT] cls=%d head=%p tail=%p next=%p moved=%u/%u\n",
class_idx, chain_head, tail, next, moved, to_move);
class_idx, HAK_BASE_TO_RAW(chain_head), HAK_BASE_TO_RAW(tail), raw_next, moved, to_move);
}
}
if (!next) {
if (hak_base_is_null(next)) {
break;
}
// Restore header on each traversed node (C1-C6 only; C0/C7 skip)
tiny_header_write_if_preserved(next, class_idx);
tiny_header_write_if_preserved(raw_next, class_idx);
tail = next;
moved++;
@ -727,7 +737,7 @@ static inline uint32_t tls_sll_splice(int class_idx,
// Link tail to existing head and install new head.
tls_sll_debug_guard(class_idx, tail, "splice_tail");
PTR_NEXT_WRITE("tls_splice_link", class_idx, tail, 0, g_tls_sll[class_idx].head);
PTR_NEXT_WRITE("tls_splice_link", class_idx, HAK_BASE_TO_RAW(tail), 0, HAK_BASE_TO_RAW(g_tls_sll[class_idx].head));
g_tls_sll[class_idx].head = chain_head;
tls_sll_record_writer(class_idx, "splice");
@ -742,22 +752,22 @@ static inline uint32_t tls_sll_splice(int class_idx,
// No changes required to call sites.
#if !HAKMEM_BUILD_RELEASE
static inline bool tls_sll_push_guarded(int class_idx, void* ptr, uint32_t capacity,
static inline bool tls_sll_push_guarded(int class_idx, hak_base_ptr_t ptr, uint32_t capacity,
const char* where, const char* file, int line) {
// Enhanced duplicate guard (scan up to 256 nodes for deep duplicates)
uint32_t scanned = 0;
void* cur = g_tls_sll[class_idx].head;
hak_base_ptr_t cur = g_tls_sll[class_idx].head;
const uint32_t limit = (g_tls_sll[class_idx].count < 256) ? g_tls_sll[class_idx].count : 256;
while (cur && scanned < limit) {
if (cur == ptr) {
while (!hak_base_is_null(cur) && scanned < limit) {
if (hak_base_eq(cur, ptr)) {
// Enhanced error message with both old and new callsite info
const char* last_file = g_tls_sll_push_file[class_idx] ? g_tls_sll_push_file[class_idx] : "(null)";
fprintf(stderr,
"[TLS_SLL_DUP] cls=%d ptr=%p head=%p count=%u scanned=%u\n"
" Current push: where=%s at %s:%d\n"
" Previous push: %s:%d\n",
class_idx, ptr, g_tls_sll[class_idx].head, g_tls_sll[class_idx].count, scanned,
class_idx, HAK_BASE_TO_RAW(ptr), HAK_BASE_TO_RAW(g_tls_sll[class_idx].head), g_tls_sll[class_idx].count, scanned,
where, file, line,
last_file, g_tls_sll_push_line[class_idx]);
@ -765,9 +775,9 @@ static inline bool tls_sll_push_guarded(int class_idx, void* ptr, uint32_t capac
ptr_trace_dump_now("tls_sll_dup");
abort();
}
void* next = NULL;
PTR_NEXT_READ("tls_sll_dupcheck", class_idx, cur, 0, next);
cur = next;
void* raw_next = NULL;
PTR_NEXT_READ("tls_sll_dupcheck", class_idx, HAK_BASE_TO_RAW(cur), 0, raw_next);
cur = HAK_BASE_FROM_RAW(raw_next);
scanned++;
}
@ -792,4 +802,4 @@ static inline bool tls_sll_push_guarded(int class_idx, void* ptr, uint32_t capac
tls_sll_pop_impl((cls), (out), NULL)
#endif
#endif // TLS_SLL_BOX_H
#endif // TLS_SLL_BOX_H