// tiny_mailbox.c - Publish Mailbox box #include #include #include #include "hakmem_tiny.h" #include "tiny_debug_ring.h" #include #include "tiny_mailbox.h" #include #ifndef MAILBOX_SHARDS #define MAILBOX_SHARDS 64 #endif // Shared state (per class) static _Atomic(uintptr_t) g_pub_mailbox_entries[TINY_NUM_CLASSES][MAILBOX_SHARDS]; static _Atomic(uint32_t) g_pub_mailbox_claimed[TINY_NUM_CLASSES][MAILBOX_SHARDS]; static _Atomic(uint32_t) g_pub_mailbox_rr[TINY_NUM_CLASSES]; static _Atomic(uint32_t) g_pub_mailbox_used[TINY_NUM_CLASSES]; static _Atomic(uint32_t) g_pub_mailbox_scan[TINY_NUM_CLASSES]; static __thread uint8_t g_tls_mailbox_registered[TINY_NUM_CLASSES]; static __thread uint8_t g_tls_mailbox_slot[TINY_NUM_CLASSES]; static int g_mailbox_trace_en = -1; static int g_mailbox_trace_limit = 4; static _Atomic int g_mailbox_trace_seen[TINY_NUM_CLASSES]; // Optional: periodic slow discovery to widen 'used' even when >0 (A/B) static int g_mailbox_slowdisc_en = -1; // env: HAKMEM_TINY_MAILBOX_SLOWDISC (default ON) static int g_mailbox_slowdisc_period = -1; // env: HAKMEM_TINY_MAILBOX_SLOWDISC_PERIOD (default 256) static __thread uint32_t g_mailbox_fetch_tick[TINY_NUM_CLASSES]; // Thread-exit hook to release claimed slots static pthread_once_t g_mailbox_tls_once = PTHREAD_ONCE_INIT; static pthread_key_t g_mailbox_tls_key; static void tiny_mailbox_unregister_class(int class_idx); static void tiny_mailbox_tls_cleanup(void* key) { (void)key; for (int i = 0; i < TINY_NUM_CLASSES; i++) { if (g_tls_mailbox_registered[i]) { tiny_mailbox_unregister_class(i); } } } static void tiny_mailbox_tls_init(void) { (void)pthread_key_create(&g_mailbox_tls_key, tiny_mailbox_tls_cleanup); } // Counters (extern from main module) extern unsigned long long g_pub_mail_hits[]; extern unsigned long long g_rf_hit_mail[]; extern unsigned long long g_mailbox_register_calls[]; extern unsigned long long g_mailbox_slow_discoveries[]; // (bench mode is handled outside; mailbox is agnostic) // Register publisher slot for this TLS void tiny_mailbox_register(int class_idx) { if (g_tls_mailbox_registered[class_idx]) return; g_mailbox_register_calls[class_idx]++; // One-shot visibility trace (env: HAKMEM_TINY_RF_TRACE) static int trace_en = -1; if (__builtin_expect(trace_en == -1, 0)) { const char* e = getenv("HAKMEM_TINY_RF_TRACE"); trace_en = (e && atoi(e) != 0) ? 1 : 0; } pthread_once(&g_mailbox_tls_once, tiny_mailbox_tls_init); pthread_setspecific(g_mailbox_tls_key, (void*)1); uint32_t chosen = MAILBOX_SHARDS; for (int attempt = 0; attempt < MAILBOX_SHARDS; attempt++) { uint32_t idx = atomic_fetch_add_explicit(&g_pub_mailbox_rr[class_idx], 1u, memory_order_relaxed); idx &= (MAILBOX_SHARDS - 1u); uint32_t expected_claim = 0; if (atomic_compare_exchange_weak_explicit(&g_pub_mailbox_claimed[class_idx][idx], &expected_claim, 1u, memory_order_release, memory_order_relaxed)) { chosen = idx; break; } } if (chosen == MAILBOX_SHARDS) { atomic_store_explicit(&g_pub_mailbox_claimed[class_idx][0], 1u, memory_order_release); chosen = 0; } g_tls_mailbox_slot[class_idx] = (uint8_t)chosen; g_tls_mailbox_registered[class_idx] = 1; atomic_store_explicit(&g_pub_mailbox_entries[class_idx][chosen], (uintptr_t)0, memory_order_release); // Monotonic raise of used to cover chosen index uint32_t target = chosen + 1u; while (1) { uint32_t used = atomic_load_explicit(&g_pub_mailbox_used[class_idx], memory_order_acquire); if (used >= target) break; if (atomic_compare_exchange_weak_explicit(&g_pub_mailbox_used[class_idx], &used, target, memory_order_acq_rel, memory_order_relaxed)) { break; } } if (trace_en) { static _Atomic int printed[8]; int expected = 0; if (atomic_compare_exchange_strong(&printed[class_idx], &expected, 1)) { fprintf(stderr, "[MBTRACE] register class=%d slot=%u used=%u\n", class_idx, (unsigned)chosen, (unsigned)atomic_load_explicit(&g_pub_mailbox_used[class_idx], memory_order_relaxed)); } } } void tiny_mailbox_publish(int class_idx, SuperSlab* ss, int slab_idx) { tiny_mailbox_register(class_idx); // Encode entry locally (align >=1MB, lower 6 bits carry slab_idx) uintptr_t ent = ((uintptr_t)ss) | ((uintptr_t)slab_idx & 0x3Fu); uint32_t slot = g_tls_mailbox_slot[class_idx]; tiny_debug_ring_record(TINY_RING_EVENT_MAILBOX_PUBLISH, (uint16_t)class_idx, ss, ((uintptr_t)slot << 32) | (uint32_t)(slab_idx & 0x3Fu)); atomic_store_explicit(&g_pub_mailbox_entries[class_idx][slot], ent, memory_order_release); } static void tiny_mailbox_unregister_class(int class_idx) { if (!g_tls_mailbox_registered[class_idx]) return; uint32_t slot = g_tls_mailbox_slot[class_idx]; atomic_store_explicit(&g_pub_mailbox_entries[class_idx][slot], (uintptr_t)0, memory_order_release); atomic_store_explicit(&g_pub_mailbox_claimed[class_idx][slot], 0u, memory_order_release); g_tls_mailbox_registered[class_idx] = 0; g_tls_mailbox_slot[class_idx] = 0; } uintptr_t tiny_mailbox_fetch(int class_idx) { if (__builtin_expect(g_mailbox_trace_en == -1, 0)) { const char* e = getenv("HAKMEM_TINY_PUBLISH_TRACE"); g_mailbox_trace_en = (e && atoi(e) != 0) ? 1 : 0; const char* lim = getenv("HAKMEM_TINY_PUBLISH_TRACE_MAX"); if (lim && *lim) { int v = atoi(lim); if (v > 0) g_mailbox_trace_limit = v; } } uint32_t used = atomic_load_explicit(&g_pub_mailbox_used[class_idx], memory_order_acquire); if (used > MAILBOX_SHARDS) used = MAILBOX_SHARDS; // Slow-path discovery for late registration if (used == 0) { for (uint32_t i = 0; i < MAILBOX_SHARDS; i++) { uint32_t claimed = atomic_load_explicit(&g_pub_mailbox_claimed[class_idx][i], memory_order_acquire); if (claimed) { g_mailbox_slow_discoveries[class_idx]++; const char* e = getenv("HAKMEM_TINY_RF_TRACE"); if (e && atoi(e) != 0) { static _Atomic int printed_slow[8]; int expected = 0; if (atomic_compare_exchange_strong(&printed_slow[class_idx], &expected, 1)) { fprintf(stderr, "[MBTRACE] slow-discover class=%d first_slot=%u\n", class_idx, (unsigned)i); } } uint32_t target = i + 1u; while (1) { uint32_t cur = atomic_load_explicit(&g_pub_mailbox_used[class_idx], memory_order_acquire); if (cur >= target) break; if (atomic_compare_exchange_weak_explicit(&g_pub_mailbox_used[class_idx], &cur, target, memory_order_acq_rel, memory_order_relaxed)) { break; } } used = target; break; } } if (used == 0) return (uintptr_t)0; } // Optional periodic discovery: occasionally scan for newly-claimed slots beyond 'used' if (__builtin_expect(g_mailbox_slowdisc_en == -1, 0)) { const char* e = getenv("HAKMEM_TINY_MAILBOX_SLOWDISC"); g_mailbox_slowdisc_en = (e ? ((atoi(e) != 0) ? 1 : 0) : 1); } if (__builtin_expect(g_mailbox_slowdisc_period == -1, 0)) { const char* p = getenv("HAKMEM_TINY_MAILBOX_SLOWDISC_PERIOD"); int v = (p && *p) ? atoi(p) : 128; if (v <= 0) v = 256; g_mailbox_slowdisc_period = v; } if (g_mailbox_slowdisc_en && used < MAILBOX_SHARDS) { uint32_t t = ++g_mailbox_fetch_tick[class_idx]; int period = g_mailbox_slowdisc_period; if (period > 0 && (t % (uint32_t)period) == 0u) { for (uint32_t i = used; i < MAILBOX_SHARDS; i++) { uint32_t claimed = atomic_load_explicit(&g_pub_mailbox_claimed[class_idx][i], memory_order_acquire); if (claimed) { uint32_t target = i + 1u; uint32_t cur = atomic_load_explicit(&g_pub_mailbox_used[class_idx], memory_order_acquire); while (cur < target) { if (atomic_compare_exchange_weak_explicit(&g_pub_mailbox_used[class_idx], &cur, target, memory_order_acq_rel, memory_order_relaxed)) { break; } } break; } } } } uint32_t start = atomic_fetch_add_explicit(&g_pub_mailbox_scan[class_idx], 1u, memory_order_relaxed); start &= (MAILBOX_SHARDS - 1u); for (uint32_t n = 0; n < used; n++) { uint32_t idx = (start + n) & (MAILBOX_SHARDS - 1u); uint32_t claimed = atomic_load_explicit(&g_pub_mailbox_claimed[class_idx][idx], memory_order_acquire); if (!claimed) continue; _Atomic(uintptr_t)* mailbox = &g_pub_mailbox_entries[class_idx][idx]; uintptr_t ent = atomic_exchange_explicit(mailbox, (uintptr_t)0, memory_order_acq_rel); if (ent) { g_pub_mail_hits[class_idx]++; g_rf_hit_mail[class_idx]++; SuperSlab* ss = (SuperSlab*)(ent & ~((uintptr_t)SUPERSLAB_SIZE_MIN - 1u)); int slab = (int)(ent & 0x3Fu); tiny_debug_ring_record(TINY_RING_EVENT_MAILBOX_FETCH, (uint16_t)class_idx, ss, ((uintptr_t)idx << 32) | (uint32_t)(slab & 0x3Fu)); if (g_mailbox_trace_en) { int limit = g_mailbox_trace_limit; if (limit <= 0) limit = 4; int seen = atomic_load_explicit(&g_mailbox_trace_seen[class_idx], memory_order_relaxed); while (seen < limit) { if (atomic_compare_exchange_weak_explicit(&g_mailbox_trace_seen[class_idx], &seen, seen + 1, memory_order_acq_rel, memory_order_relaxed)) { fprintf(stderr, "[MBTRACE+] class=%d ss=%p slab=%d\n", class_idx, (void*)ss, slab); break; } seen = atomic_load_explicit(&g_mailbox_trace_seen[class_idx], memory_order_relaxed); } } const char* e = getenv("HAKMEM_TINY_RF_TRACE"); if (e && atoi(e) != 0) { static _Atomic int printed_hit[8]; int expected = 0; if (atomic_compare_exchange_strong(&printed_hit[class_idx], &expected, 1)) { fprintf(stderr, "[MBTRACE] fetch-hit class=%d ss=%p slab=%d\n", class_idx, (void*)ss, slab); } } return ent; } } tiny_debug_ring_record(TINY_RING_EVENT_MAILBOX_FETCH_NULL, (uint16_t)class_idx, NULL, (uintptr_t)used); return (uintptr_t)0; }