static inline uint16_t hak_thread_id16(void) { // best-effort compress cached thread id to 16 bits uint32_t tid = tiny_self_u32(); return (uint16_t)(tid ^ (tid >> 16)); } static inline void eventq_push_ex(int class_idx, uint32_t size, uint8_t tier, uint8_t flags, uint32_t site_id, uint16_t lat_bucket) { (void)flags; (void)lat_bucket; (void)site_id; if (!g_int_engine) return; // Lightweight sampling: if mask set, log 1 out of 2^N unsigned m = g_int_sample_mask; if (m != 0) { unsigned x = g_tls_ev_seq++; if ((x & m) != 0) return; } uint32_t t = atomic_fetch_add_explicit(&g_ev_tail, 1u, memory_order_relaxed); AllocEvent ev; ev.ts_ns = g_int_event_ts ? hak_now_ns() : 0; ev.size = size; ev.site_id = 0; // keep minimal ev.latency_bucket = 0; ev.tier_hit = tier; ev.flags = 0; ev.class_idx = (uint16_t)class_idx; ev.thread_id = 0; g_ev_ring[t & EVENTQ_MASK] = ev; // best-effort overwrite on overflow }