Fix cross-thread ownership check: Use bits 8-15 for owner_tid_low
Problem: - TLS_SLL_PUSH_DUP crash in Larson multi-threaded benchmark - Cross-thread frees incorrectly routed to same-thread TLS path - Root cause: pthread_t on glibc is 256-byte aligned (TCB base) so lower 8 bits are ALWAYS 0x00 for ALL threads Fix: - Change owner_tid_low from (tid & 0xFF) to ((tid >> 8) & 0xFF) - Bits 8-15 actually vary between threads, enabling correct detection - Applied consistently across all ownership check locations: - superslab_inline.h: ss_owner_try_acquire/release/is_mine - slab_handle.h: slab_try_acquire - tiny_free_fast.inc.h: tiny_free_is_same_thread_ss - tiny_free_fast_v2.inc.h: cross-thread detection - tiny_superslab_free.inc.h: same-thread check - ss_allocation_box.c: slab initialization - hakmem_tiny_superslab.c: ownership handling Also added: - Address watcher debug infrastructure (tiny_region_id.h) - Cross-thread detection in malloc_tiny_fast.h Front Gate Test results: - Larson 1T/2T/4T: PASS (no TLS_SLL_PUSH_DUP crash) - random_mixed: PASS - Performance: ~20M ops/s (regression from 48M, needs optimization) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -40,9 +40,12 @@ extern int g_tls_sll_enable; // Honored for fast free: when 0, fall back to slo
|
||||
extern void hak_tiny_free(void* ptr); // Fallback for non-header allocations
|
||||
|
||||
// Inline helper: Get current thread ID (lower 32 bits)
|
||||
#ifndef TINY_SELF_U32_LOCAL_DEFINED
|
||||
#define TINY_SELF_U32_LOCAL_DEFINED
|
||||
static inline uint32_t tiny_self_u32_local(void) {
|
||||
return (uint32_t)(uintptr_t)pthread_self();
|
||||
}
|
||||
#endif
|
||||
|
||||
// ========== Ultra-Fast Free (Header-based) ==========
|
||||
|
||||
@ -198,8 +201,9 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
|
||||
uint32_t self_tid = tiny_self_u32_local();
|
||||
uint8_t owner_tid_low = ss_slab_meta_owner_tid_low_get(ss, slab_idx);
|
||||
|
||||
// Check if this is a cross-thread free (lower 8 bits mismatch)
|
||||
if (__builtin_expect((owner_tid_low & 0xFF) != (self_tid & 0xFF), 0)) {
|
||||
// Check if this is a cross-thread free (compare bits 8-15; low 8 bits are 0 on glibc)
|
||||
uint8_t self_tid_cmp = (uint8_t)((self_tid >> 8) & 0xFFu);
|
||||
if (__builtin_expect(owner_tid_low != self_tid_cmp, 0)) {
|
||||
// Cross-thread free → remote queue routing
|
||||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||
if (tiny_free_remote_box(ss, slab_idx, meta, ptr, self_tid)) {
|
||||
@ -220,12 +224,50 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
|
||||
// Hypothesis: Box TLS-SLL acts as verification layer, masking underlying bugs
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
// Debug: Log free operations (first 50, class 1 only)
|
||||
// Address watcher: Check if this is the watched address being freed
|
||||
{
|
||||
extern uintptr_t get_watch_addr(void);
|
||||
uintptr_t watch = get_watch_addr();
|
||||
if (watch != 0 && (uintptr_t)base == watch) {
|
||||
extern _Atomic uint64_t g_debug_op_count;
|
||||
extern __thread TinyTLSSLL g_tls_sll[];
|
||||
uint64_t op = atomic_load(&g_debug_op_count);
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "========================================\n");
|
||||
fprintf(stderr, "[WATCH_FREE_HIT] Address %p freed!\n", base);
|
||||
fprintf(stderr, "========================================\n");
|
||||
fprintf(stderr, " Operation: #%lu\n", (unsigned long)op);
|
||||
fprintf(stderr, " Class: %d\n", class_idx);
|
||||
fprintf(stderr, " User ptr: %p\n", ptr);
|
||||
fprintf(stderr, " Base ptr: %p\n", base);
|
||||
fprintf(stderr, " TLS count: %u (before free)\n", g_tls_sll[class_idx].count);
|
||||
fprintf(stderr, " TLS head: %p\n", g_tls_sll[class_idx].head);
|
||||
fprintf(stderr, "========================================\n");
|
||||
fprintf(stderr, "\n");
|
||||
fflush(stderr);
|
||||
|
||||
// Print backtrace
|
||||
void* bt[16];
|
||||
int frames = backtrace(bt, 16);
|
||||
fprintf(stderr, "[WATCH_FREE_BACKTRACE] %d frames:\n", frames);
|
||||
backtrace_symbols_fd(bt, frames, fileno(stderr));
|
||||
fprintf(stderr, "\n");
|
||||
fflush(stderr);
|
||||
|
||||
// Abort to preserve state
|
||||
fprintf(stderr, "[WATCH_ABORT] Aborting on watched free...\n");
|
||||
fflush(stderr);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
// Debug: Log free operations (first 2000, ALL classes)
|
||||
{
|
||||
extern _Atomic uint64_t g_debug_op_count;
|
||||
extern __thread TinyTLSSLL g_tls_sll[];
|
||||
uint64_t op = atomic_fetch_add(&g_debug_op_count, 1);
|
||||
if (op < 50 && class_idx == 1) {
|
||||
if (op < 2000) { // ALL classes, not just class 1
|
||||
fprintf(stderr, "[OP#%04lu FREE] cls=%d ptr=%p base=%p tls_count_before=%u\n",
|
||||
(unsigned long)op, class_idx, ptr, base,
|
||||
g_tls_sll[class_idx].count);
|
||||
|
||||
Reference in New Issue
Block a user