diff --git a/core/box/ss_allocation_box.c b/core/box/ss_allocation_box.c
index 38eed400..08812da4 100644
--- a/core/box/ss_allocation_box.c
+++ b/core/box/ss_allocation_box.c
@@ -415,7 +415,8 @@ void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_
     meta->used = 0;
     meta->capacity = capacity;
     meta->carved = 0;
-    meta->owner_tid_low = (uint8_t)(owner_tid & 0xFFu);
+    // Store bits 8-15 of owner_tid (low 8 bits are 0 for glibc pthread IDs)
+    meta->owner_tid_low = (uint8_t)((owner_tid >> 8) & 0xFFu);
     // Fail-safe: stamp class_idx from geometry (stride → class).
     // This normalizes both legacy and shared pool paths.
     for (int i = 0; i < TINY_NUM_CLASSES; i++) {
diff --git a/core/front/malloc_tiny_fast.h b/core/front/malloc_tiny_fast.h
index c18b439e..8db1c9ba 100644
--- a/core/front/malloc_tiny_fast.h
+++ b/core/front/malloc_tiny_fast.h
@@ -24,12 +24,25 @@
 #include <stdint.h>
 #include <stdlib.h>
 #include <stdio.h>
+#include <pthread.h>  // For pthread_self() in cross-thread check
 #include "../hakmem_build_flags.h"
 #include "../hakmem_tiny_config.h"  // For TINY_NUM_CLASSES
+#include "../hakmem_super_registry.h"  // For cross-thread owner check
+#include "../superslab/superslab_inline.h"  // For slab_index_for
+#include "../box/ss_slab_meta_box.h"   // For ss_slab_meta_owner_tid_low_get
+#include "../box/free_remote_box.h"    // For tiny_free_remote_box
 #include "tiny_unified_cache.h"     // For unified_cache_pop_or_refill
 #include "../tiny_region_id.h"      // For tiny_region_id_write_header
 #include "../hakmem_tiny.h"         // For hak_tiny_size_to_class
 
+// Helper: current thread id (low 32 bits) for owner check
+#ifndef TINY_SELF_U32_LOCAL_DEFINED
+#define TINY_SELF_U32_LOCAL_DEFINED
+static inline uint32_t tiny_self_u32_local(void) {
+    return (uint32_t)(uintptr_t)pthread_self();
+}
+#endif
+
 // ============================================================================
 // ENV Control (cached, lazy init)
 // ============================================================================
@@ -132,6 +145,76 @@ static inline int free_tiny_fast(void* ptr) {
 
     // 4. BASE を計算して Unified Cache に push
     void* base = (void*)((char*)ptr - 1);
+
+    // Cross-thread free detection (Larson MT crash fix, ENV gated)
+    {
+        static __thread int g_larson_fix = -1;
+        if (__builtin_expect(g_larson_fix == -1, 0)) {
+            const char* e = getenv("HAKMEM_TINY_LARSON_FIX");
+            g_larson_fix = (e && *e && *e != '0') ? 1 : 0;
+#if !HAKMEM_BUILD_RELEASE
+            fprintf(stderr, "[LARSON_FIX_INIT] g_larson_fix=%d (env=%s)\n", g_larson_fix, e ? e : "NULL");
+            fflush(stderr);
+#endif
+        }
+
+        if (__builtin_expect(g_larson_fix, 0)) {
+            SuperSlab* ss = hak_super_lookup(base);
+            if (ss && ss->magic == SUPERSLAB_MAGIC) {
+                int slab_idx = slab_index_for(ss, base);
+                if (__builtin_expect(slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss), 1)) {
+                    uint32_t self_tid = tiny_self_u32_local();
+                    uint8_t owner_tid_low = ss_slab_meta_owner_tid_low_get(ss, slab_idx);
+                    // LARSON FIX: Use bits 8-15 for comparison (pthread TIDs aligned to 256 bytes)
+                    uint8_t self_tid_cmp = (uint8_t)((self_tid >> 8) & 0xFFu);
+#if !HAKMEM_BUILD_RELEASE
+                    static _Atomic uint64_t g_owner_check_count = 0;
+                    uint64_t oc = atomic_fetch_add(&g_owner_check_count, 1);
+                    if (oc < 10) {
+                        fprintf(stderr, "[LARSON_FIX] Owner check: ptr=%p owner_tid_low=0x%02x self_tid_cmp=0x%02x self_tid=0x%08x match=%d\n",
+                                ptr, owner_tid_low, self_tid_cmp, self_tid, (owner_tid_low == self_tid_cmp));
+                        fflush(stderr);
+                    }
+#endif
+
+                    if (__builtin_expect(owner_tid_low != self_tid_cmp, 0)) {
+                        // Cross-thread free → route to remote queue instead of poisoning TLS cache
+#if !HAKMEM_BUILD_RELEASE
+                        static _Atomic uint64_t g_cross_thread_count = 0;
+                        uint64_t ct = atomic_fetch_add(&g_cross_thread_count, 1);
+                        if (ct < 20) {
+                            fprintf(stderr, "[LARSON_FIX] Cross-thread free detected! ptr=%p owner_tid_low=0x%02x self_tid_cmp=0x%02x self_tid=0x%08x\n",
+                                    ptr, owner_tid_low, self_tid_cmp, self_tid);
+                            fflush(stderr);
+                        }
+#endif
+                        TinySlabMeta* meta = &ss->slabs[slab_idx];
+                        if (tiny_free_remote_box(ss, slab_idx, meta, ptr, self_tid)) {
+                            return 1;  // handled via remote queue
+                        }
+                        return 0;  // remote push failed; fall back to normal path
+                    }
+                }
+            }
+        }
+    }
+
+    // Debug: Log free operations (first 5000, all classes)
+#if !HAKMEM_BUILD_RELEASE
+    {
+        extern _Atomic uint64_t g_debug_op_count;
+        extern __thread TinyTLSSLL g_tls_sll[];
+        uint64_t op = atomic_fetch_add(&g_debug_op_count, 1);
+        // Note: Shares g_debug_op_count with alloc logging, so bump the window.
+        if (op < 5000) {
+            fprintf(stderr, "[OP#%04lu FREE] cls=%d ptr=%p base=%p from=free_tiny_fast tls_count_before=%u\n",
+                    (unsigned long)op, class_idx, ptr, base,
+                    g_tls_sll[class_idx].count);
+            fflush(stderr);
+        }
+    }
+#endif
+
     int pushed = unified_cache_push(class_idx, base);
     if (__builtin_expect(pushed, 1)) {
         return 1;  // Success
diff --git a/core/hakmem_tiny_superslab.c b/core/hakmem_tiny_superslab.c
index a537ed97..19fa35cf 100644
--- a/core/hakmem_tiny_superslab.c
+++ b/core/hakmem_tiny_superslab.c
@@ -595,8 +595,9 @@ static void* hak_tiny_alloc_superslab_backend_shared(int class_idx)
     // Initialize slab geometry once for this class.
     if (meta->capacity == 0) {
         size_t block_size = g_tiny_class_sizes[class_idx];
-        // owner_tid_low is advisory; we can use 0 in this backend.
-        superslab_init_slab(ss, slab_idx, block_size, 0);
+        // LARSON FIX: Pass actual thread ID for cross-thread free detection
+        uint32_t my_tid = (uint32_t)(uintptr_t)pthread_self();
+        superslab_init_slab(ss, slab_idx, block_size, my_tid);
         meta = &ss->slabs[slab_idx];
 
         // CRITICAL FIX: Always set class_idx after init to avoid C0/C7 confusion.
@@ -1195,7 +1196,8 @@ void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_
     meta->used = 0;
     meta->capacity = capacity;
     meta->carved = 0;
-    meta->owner_tid_low = (uint8_t)(owner_tid & 0xFFu);
+    // LARSON FIX: Use bits 8-15 instead of 0-7 since pthread TIDs are aligned to 256 bytes
+    meta->owner_tid_low = (uint8_t)((owner_tid >> 8) & 0xFFu);
     // Fail-safe: stamp class_idx from geometry (stride → class).
     // This ensures legacy/shared/legacy-refill paths all end with a correct class.
     for (int i = 0; i < TINY_NUM_CLASSES; i++) {
diff --git a/core/slab_handle.h b/core/slab_handle.h
index b0c8f40a..1dde81e2 100644
--- a/core/slab_handle.h
+++ b/core/slab_handle.h
@@ -53,7 +53,7 @@ static inline SlabHandle slab_try_acquire(SuperSlab* ss, int idx, uint32_t tid)
     h.ss = ss;
     h.meta = m;
     h.slab_idx = (uint8_t)idx;
-    h.owner_tid_low = (uint8_t)tid;
+    h.owner_tid_low = (uint8_t)((tid >> 8) & 0xFFu);
     if (__builtin_expect(g_debug_remote_guard, 0)) {
         uint8_t cur = __atomic_load_n(&m->owner_tid_low, __ATOMIC_RELAXED);
         if (cur != h.owner_tid_low || cur == 0) {
diff --git a/core/superslab/superslab_inline.h b/core/superslab/superslab_inline.h
index 54cd64b7..28c6a8d7 100644
--- a/core/superslab/superslab_inline.h
+++ b/core/superslab/superslab_inline.h
@@ -89,7 +89,7 @@ static inline void superslab_ref_dec(SuperSlab* ss)
 static inline int ss_owner_try_acquire(TinySlabMeta* m, uint32_t tid)
 {
     if (!m) return 0;
-    uint8_t want = (uint8_t)(tid & 0xFFu);
+    uint8_t want = (uint8_t)((tid >> 8) & 0xFFu);
     uint8_t expected = 0;
     return __atomic_compare_exchange_n(&m->owner_tid_low, &expected, want,
                                        false, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
@@ -98,7 +98,7 @@ static inline int ss_owner_try_acquire(TinySlabMeta* m, uint32_t tid)
 static inline void ss_owner_release(TinySlabMeta* m, uint32_t tid)
 {
     if (!m) return;
-    uint8_t expected = (uint8_t)(tid & 0xFFu);
+    uint8_t expected = (uint8_t)((tid >> 8) & 0xFFu);
     (void)__atomic_compare_exchange_n(&m->owner_tid_low, &expected, 0u,
                                       false, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
 }
@@ -107,7 +107,7 @@ static inline int ss_owner_is_mine(TinySlabMeta* m, uint32_t tid)
 {
     if (!m) return 0;
     uint8_t cur = __atomic_load_n(&m->owner_tid_low, __ATOMIC_RELAXED);
-    return cur == (uint8_t)(tid & 0xFFu);
+    return cur == (uint8_t)((tid >> 8) & 0xFFu);
 }
 
 // Active block accounting (saturating dec by 1)
diff --git a/core/tiny_free_fast.inc.h b/core/tiny_free_fast.inc.h
index d4957e08..d1f9ac49 100644
--- a/core/tiny_free_fast.inc.h
+++ b/core/tiny_free_fast.inc.h
@@ -57,7 +57,7 @@ extern void tiny_alloc_fast_push(int class_idx, void* ptr);
 // Invariant: This check MUST be atomic (no TOCTOU between check and push)
 static inline int tiny_free_is_same_thread_ss(SuperSlab* ss, int slab_idx, uint32_t my_tid) {
     TinySlabMeta* meta = &ss->slabs[slab_idx];
-    uint8_t my_tid_low = (uint8_t)my_tid;
+    uint8_t my_tid_low = (uint8_t)((my_tid >> 8) & 0xFFu);
     uint8_t owner = tiny_atomic_load_u8_relaxed(&meta->owner_tid_low);
     return (owner == my_tid_low && owner != 0);
 }
diff --git a/core/tiny_free_fast_v2.inc.h b/core/tiny_free_fast_v2.inc.h
index 7513fbcd..6fdaf126 100644
--- a/core/tiny_free_fast_v2.inc.h
+++ b/core/tiny_free_fast_v2.inc.h
@@ -40,9 +40,12 @@ extern int g_tls_sll_enable;  // Honored for fast free: when 0, fall back to slo
 extern void hak_tiny_free(void* ptr);  // Fallback for non-header allocations
 
 // Inline helper: Get current thread ID (lower 32 bits)
+#ifndef TINY_SELF_U32_LOCAL_DEFINED
+#define TINY_SELF_U32_LOCAL_DEFINED
 static inline uint32_t tiny_self_u32_local(void) {
     return (uint32_t)(uintptr_t)pthread_self();
 }
+#endif
 
 // ========== Ultra-Fast Free (Header-based) ==========
 
@@ -198,8 +201,9 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
                     uint32_t self_tid = tiny_self_u32_local();
                     uint8_t owner_tid_low = ss_slab_meta_owner_tid_low_get(ss, slab_idx);
 
-                    // Check if this is a cross-thread free (lower 8 bits mismatch)
-                    if (__builtin_expect((owner_tid_low & 0xFF) != (self_tid & 0xFF), 0)) {
+                    // Check if this is a cross-thread free (compare bits 8-15; low 8 bits are 0 on glibc)
+                    uint8_t self_tid_cmp = (uint8_t)((self_tid >> 8) & 0xFFu);
+                    if (__builtin_expect(owner_tid_low != self_tid_cmp, 0)) {
                         // Cross-thread free → remote queue routing
                         TinySlabMeta* meta = &ss->slabs[slab_idx];
                         if (tiny_free_remote_box(ss, slab_idx, meta, ptr, self_tid)) {
@@ -220,12 +224,50 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
     // Hypothesis: Box TLS-SLL acts as verification layer, masking underlying bugs
 
 #if !HAKMEM_BUILD_RELEASE
-    // Debug: Log free operations (first 50, class 1 only)
+    // Address watcher: Check if this is the watched address being freed
+    {
+        extern uintptr_t get_watch_addr(void);
+        uintptr_t watch = get_watch_addr();
+        if (watch != 0 && (uintptr_t)base == watch) {
+            extern _Atomic uint64_t g_debug_op_count;
+            extern __thread TinyTLSSLL g_tls_sll[];
+            uint64_t op = atomic_load(&g_debug_op_count);
+
+            fprintf(stderr, "\n");
+            fprintf(stderr, "========================================\n");
+            fprintf(stderr, "[WATCH_FREE_HIT] Address %p freed!\n", base);
+            fprintf(stderr, "========================================\n");
+            fprintf(stderr, "  Operation:     #%lu\n", (unsigned long)op);
+            fprintf(stderr, "  Class:         %d\n", class_idx);
+            fprintf(stderr, "  User ptr:      %p\n", ptr);
+            fprintf(stderr, "  Base ptr:      %p\n", base);
+            fprintf(stderr, "  TLS count:     %u (before free)\n", g_tls_sll[class_idx].count);
+            fprintf(stderr, "  TLS head:      %p\n", g_tls_sll[class_idx].head);
+            fprintf(stderr, "========================================\n");
+            fprintf(stderr, "\n");
+            fflush(stderr);
+
+            // Print backtrace
+            void* bt[16];
+            int frames = backtrace(bt, 16);
+            fprintf(stderr, "[WATCH_FREE_BACKTRACE] %d frames:\n", frames);
+            backtrace_symbols_fd(bt, frames, fileno(stderr));
+            fprintf(stderr, "\n");
+            fflush(stderr);
+
+            // Abort to preserve state
+            fprintf(stderr, "[WATCH_ABORT] Aborting on watched free...\n");
+            fflush(stderr);
+            abort();
+        }
+    }
+
+    // Debug: Log free operations (first 2000, ALL classes)
     {
         extern _Atomic uint64_t g_debug_op_count;
         extern __thread TinyTLSSLL g_tls_sll[];
         uint64_t op = atomic_fetch_add(&g_debug_op_count, 1);
-        if (op < 50 && class_idx == 1) {
+        if (op < 2000) {  // ALL classes, not just class 1
             fprintf(stderr, "[OP#%04lu FREE] cls=%d ptr=%p base=%p tls_count_before=%u\n",
                     (unsigned long)op, class_idx, ptr, base,
                     g_tls_sll[class_idx].count);
diff --git a/core/tiny_region_id.h b/core/tiny_region_id.h
index 6027544d..28c58fb9 100644
--- a/core/tiny_region_id.h
+++ b/core/tiny_region_id.h
@@ -10,6 +10,8 @@
 
 #include <stdint.h>
 #include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
 #include <execinfo.h>
 #include <dlfcn.h>
 #include "hakmem_build_flags.h"
@@ -17,6 +19,7 @@
 #include "ptr_track.h"
 #include "hakmem_super_registry.h"
 #include "superslab/superslab_inline.h"
+#include "hakmem_tiny.h"  // For TinyTLSSLL type
 
 // Feature flag: Enable header-based class_idx lookup
 #ifndef HAKMEM_TINY_HEADER_CLASSIDX
@@ -42,6 +45,122 @@
 #define HEADER_MAGIC 0xA0
 #define HEADER_CLASS_MASK 0x0F
 
+// ========== Address Watcher (Debug Only) ==========
+
+#if !HAKMEM_BUILD_RELEASE
+// Helper: Get current thread ID (watcher-local version to avoid redefinition)
+static inline uint32_t watcher_self_u32(void) {
+    return (uint32_t)(uintptr_t)pthread_self();
+}
+
+// Address watcher: Tracks when a specific address is allocated or freed
+// Usage: HAKMEM_WATCH_ADDR=0x7f1234567890 ./program
+static inline uintptr_t get_watch_addr(void) {
+    static uintptr_t watch_addr = 0;
+    static int initialized = 0;
+
+    if (!initialized) {
+        const char* env = getenv("HAKMEM_WATCH_ADDR");
+        if (env && *env) {
+            // Parse hex address (with or without 0x prefix)
+            if (env[0] == '0' && (env[1] == 'x' || env[1] == 'X')) {
+                watch_addr = (uintptr_t)strtoull(env + 2, NULL, 16);
+            } else {
+                watch_addr = (uintptr_t)strtoull(env, NULL, 16);
+            }
+            if (watch_addr != 0) {
+                fprintf(stderr, "[WATCH_INIT] Watching address: %p\n", (void*)watch_addr);
+                fflush(stderr);
+            }
+        }
+        initialized = 1;
+    }
+
+    return watch_addr;
+}
+
+// Allocation source tracking
+typedef enum {
+    ALLOC_SOURCE_UNKNOWN = 0,
+    ALLOC_SOURCE_TLS_SLL,     // TLS freelist pop
+    ALLOC_SOURCE_FREELIST,    // Slab freelist pop
+    ALLOC_SOURCE_CARVE,       // Linear carve from slab
+    ALLOC_SOURCE_NEW_SLAB,    // Newly allocated slab
+} AllocSource;
+
+static __thread AllocSource g_last_alloc_source = ALLOC_SOURCE_UNKNOWN;
+
+// Use int to match extern declarations in other files
+static inline void set_alloc_source(int source) {
+    g_last_alloc_source = (AllocSource)source;
+}
+
+static inline const char* alloc_source_name(AllocSource source) {
+    switch (source) {
+        case ALLOC_SOURCE_TLS_SLL: return "TLS_SLL";
+        case ALLOC_SOURCE_FREELIST: return "FREELIST";
+        case ALLOC_SOURCE_CARVE: return "CARVE";
+        case ALLOC_SOURCE_NEW_SLAB: return "NEW_SLAB";
+        default: return "UNKNOWN";
+    }
+}
+
+// Watch trigger: Called when watch address is allocated
+static inline void watch_alloc_trigger(void* base, int class_idx, AllocSource source) {
+    extern __thread TinyTLSSLL g_tls_sll[];
+    extern _Atomic uint64_t g_debug_op_count;
+
+    uint64_t op = atomic_load(&g_debug_op_count);
+    uint32_t tls_count = g_tls_sll[class_idx].count;
+    void* freelist_head = g_tls_sll[class_idx].head;
+
+    fprintf(stderr, "\n");
+    fprintf(stderr, "========================================\n");
+    fprintf(stderr, "[WATCH_ALLOC_HIT] Address %p allocated!\n", base);
+    fprintf(stderr, "========================================\n");
+    fprintf(stderr, "  Operation:     #%lu\n", (unsigned long)op);
+    fprintf(stderr, "  Class:         %d (%zu bytes)\n", class_idx, tiny_stride_for_class(class_idx));
+    fprintf(stderr, "  Source:        %s\n", alloc_source_name(source));
+    fprintf(stderr, "  TLS count:     %u\n", tls_count);
+    fprintf(stderr, "  TLS head:      %p\n", freelist_head);
+    fprintf(stderr, "  Thread:        %u\n", (unsigned)watcher_self_u32());
+
+    // Try to get slab metadata if available
+    struct SuperSlab* ss = hak_super_lookup(base);
+    if (ss && ss->magic == SUPERSLAB_MAGIC) {
+        int slab_idx = slab_index_for(ss, base);
+        if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
+            TinySlabMeta* meta = &ss->slabs[slab_idx];
+            fprintf(stderr, "  Slab metadata:\n");
+            fprintf(stderr, "    SuperSlab:   %p\n", (void*)ss);
+            fprintf(stderr, "    Slab index:  %d\n", slab_idx);
+            fprintf(stderr, "    Slab class:  %u\n", (unsigned)meta->class_idx);
+            fprintf(stderr, "    Used:        %u\n", (unsigned)meta->used);
+            fprintf(stderr, "    Capacity:    %u\n", (unsigned)meta->capacity);
+            fprintf(stderr, "    Freelist:    %p\n", meta->freelist);
+            fprintf(stderr, "    Owner TID:   %u\n", (unsigned)meta->owner_tid_low);
+        }
+    }
+
+    fprintf(stderr, "========================================\n");
+    fprintf(stderr, "\n");
+    fflush(stderr);
+
+    // Print backtrace for debugging
+    void* bt[16];
+    int frames = backtrace(bt, 16);
+    fprintf(stderr, "[WATCH_BACKTRACE] %d frames:\n", frames);
+    backtrace_symbols_fd(bt, frames, fileno(stderr));
+    fprintf(stderr, "\n");
+    fflush(stderr);
+
+    // Abort to capture the exact moment
+    fprintf(stderr, "[WATCH_ABORT] Aborting to preserve state...\n");
+    fflush(stderr);
+    abort();
+}
+#endif // !HAKMEM_BUILD_RELEASE
+
 // ========== Write Header (Allocation) ==========
 
 // Write class_idx to header (called after allocation)
@@ -50,6 +169,14 @@
 static inline void* tiny_region_id_write_header(void* base, int class_idx) {
     if (!base) return base;
 
+#if !HAKMEM_BUILD_RELEASE
+    // Address watcher: Check if this is the watched address
+    uintptr_t watch = get_watch_addr();
+    if (watch != 0 && (uintptr_t)base == watch) {
+        watch_alloc_trigger(base, class_idx, g_last_alloc_source);
+    }
+#endif
+
     // Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header (no exceptions)
     // Rationale: Unified box structure enables:
     //   - O(1) class identification (no registry lookup)
@@ -105,6 +232,23 @@ static inline void* tiny_region_id_write_header(void* base, int class_idx) {
     PTR_TRACK_HEADER_WRITE(base, HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK));
     void* user = header_ptr + 1;  // skip header for user pointer
     PTR_TRACK_MALLOC(base, 0, class_idx);  // Track at BASE (where header is)
+
+    // ========== ALLOCATION LOGGING (Debug builds only) ==========
+    // NOTE: This logging is ALWAYS active (not guarded by HAKMEM_BUILD_RELEASE)
+    // because we need to track allocations even in optimized debug builds
+    {
+        extern _Atomic uint64_t g_debug_op_count;
+        extern __thread TinyTLSSLL g_tls_sll[];
+        uint64_t op = atomic_fetch_add(&g_debug_op_count, 1);
+        if (op < 2000) {  // ALL classes for comprehensive tracing
+            fprintf(stderr, "[OP#%04lu ALLOC] cls=%d ptr=%p base=%p from=write_header tls_count=%u\n",
+                    (unsigned long)op, class_idx, user, base,
+                    g_tls_sll[class_idx].count);
+            fflush(stderr);
+        }
+    }
+    // ========== END ALLOCATION LOGGING ==========
+
     // Optional guard: log stride/base/user for targeted class
     extern int tiny_guard_is_enabled(void);
     extern void tiny_guard_on_alloc(int cls, void* base, void* user, size_t stride);
diff --git a/core/tiny_superslab_free.inc.h b/core/tiny_superslab_free.inc.h
index 19566a79..4ad6ba46 100644
--- a/core/tiny_superslab_free.inc.h
+++ b/core/tiny_superslab_free.inc.h
@@ -149,7 +149,7 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
 
     // Phase 6.23: Same-thread check (Phase 12: owner_tid_low)
     uint32_t my_tid = tiny_self_u32();
-    uint8_t my_tid_low = (uint8_t)my_tid;
+    uint8_t my_tid_low = (uint8_t)((my_tid >> 8) & 0xFFu);
     const int debug_guard = g_debug_remote_guard;
     static __thread int g_debug_free_count = 0;
     // If owner is not set yet, claim ownership (low 8 bits) to avoid spurious remote path in 1T