#ifndef HAKMEM_TINY_TLS_OPS_H #define HAKMEM_TINY_TLS_OPS_H #include "hakmem_tiny.h" #include "hakmem_tiny_superslab.h" #include "hakmem_super_registry.h" #include "tiny_remote.h" #include "box/tiny_next_ptr_box.h" #include // Forward declarations for external dependencies extern int g_use_superslab; extern const size_t g_tiny_class_sizes[TINY_NUM_CLASSES]; extern __thread TinyTLSSlab g_tls_slabs[TINY_NUM_CLASSES]; extern __thread void* g_tls_sll_head[TINY_NUM_CLASSES]; extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES]; extern __thread void* g_fast_head[TINY_NUM_CLASSES]; extern __thread uint16_t g_fast_count[TINY_NUM_CLASSES]; extern __thread TinyTLSList g_tls_lists[TINY_NUM_CLASSES]; extern __thread TinySlab* g_tls_active_slab_a[TINY_NUM_CLASSES]; extern __thread TinySlab* g_tls_active_slab_b[TINY_NUM_CLASSES]; extern PaddedLock g_tiny_class_locks[TINY_NUM_CLASSES]; #if !HAKMEM_BUILD_RELEASE extern __thread TinyTLSMag g_tls_mags[TINY_NUM_CLASSES]; void tiny_small_mags_init_once(void); void tiny_mag_init_if_needed(int class_idx); #endif #if HAKMEM_BUILD_DEBUG extern uint64_t g_tls_spill_ss_count[TINY_NUM_CLASSES]; extern uint64_t g_tls_spill_owner_count[TINY_NUM_CLASSES]; extern uint64_t g_tls_spill_mag_count[TINY_NUM_CLASSES]; extern uint64_t g_tls_spill_requeue_count[TINY_NUM_CLASSES]; #endif // NOTE: Helper functions are defined in hakmem_tiny.c before this header is included // No forward declarations needed - functions are already visible // ============================================================================ // TLS Operations - Hot Path Functions (Inline for Performance) // ============================================================================ // Refill TLS list from TLS-bound SuperSlab (100-line hot path) static inline int tls_refill_from_tls_slab(int class_idx, TinyTLSList* tls, uint32_t want) { if (!g_use_superslab || tls == NULL) return 0; TinyTLSSlab* tls_slab = &g_tls_slabs[class_idx]; if (!tls_slab->ss) { if (superslab_refill(class_idx) == NULL) return 0; } TinySlabMeta* meta = tls_slab->meta; if (!meta) return 0; uint32_t cap = tls_list_spill_threshold(tls); if (tls->count >= cap) return 0; uint32_t room = cap - tls->count; if (want == 0u || want > room) want = room; if (want == 0u) return 0; // Use stride (class_size + header for C0-6, headerless for C7) size_t block_stride = tiny_stride_for_class(class_idx); // Header-aware TLS list next offset for chains we build here #if HAKMEM_TINY_HEADER_CLASSIDX // Phase E1-CORRECT: ALL classes have 1-byte header, next ptr at offset 1 const size_t next_off_tls = 1; #else const size_t next_off_tls = 0; #endif void* accum_head = NULL; void* accum_tail = NULL; uint32_t total = 0u; uint8_t* slab_base = tls_slab->slab_base ? tls_slab->slab_base : (tls_slab->ss ? tiny_slab_base_for(tls_slab->ss, tls_slab->slab_idx) : NULL); while (total < want) { // 1) 再利用フリーリスト if (meta->freelist) { void* local_head = NULL; void* local_tail = NULL; uint32_t local = 0u; uint32_t need = want - total; while (local < need && meta->freelist) { void* node = meta->freelist; // BUG FIX: Use Box API to read next pointer at correct offset meta->freelist = tiny_next_read(class_idx, node); // freelist is base-linked tiny_next_write(class_idx, node, local_head); local_head = node; if (!local_tail) local_tail = node; local++; } if (local > 0u) { // Do not adjust active here (blocks not yet returned to user) meta->used += local; if (!accum_head) { accum_head = local_head; accum_tail = local_tail; } else { tiny_next_write(class_idx, local_tail, accum_head); accum_head = local_head; } total += local; continue; } } // 2) Superslab の線形領域からまとめて切り出す if (meta->used >= meta->capacity) { if (superslab_refill(class_idx) == NULL) break; meta = tls_slab->meta; if (!meta) break; // Refresh stride/base after refill block_stride = tiny_stride_for_class(class_idx); slab_base = tls_slab->slab_base ? tls_slab->slab_base : (tls_slab->ss ? tiny_slab_base_for(tls_slab->ss, tls_slab->slab_idx) : NULL); continue; } uint32_t need = want - total; uint32_t available = meta->capacity - meta->used; if (available == 0u) continue; if (need > available) need = available; if (!slab_base) { slab_base = tiny_slab_base_for(tls_slab->ss, tls_slab->slab_idx); } uint8_t* base_cursor = slab_base + ((size_t)meta->used * block_stride); void* local_head = (void*)base_cursor; uint8_t* cursor = base_cursor; for (uint32_t i = 1; i < need; ++i) { uint8_t* next = cursor + block_stride; tiny_next_write(class_idx, (void*)cursor, (void*)next); cursor = next; } void* local_tail = (void*)cursor; meta->used += need; // Do not adjust active here (blocks not yet returned to user) if (!accum_head) { accum_head = local_head; accum_tail = local_tail; } else { tiny_next_write(class_idx, local_tail, accum_head); accum_head = local_head; } total += need; } if (total > 0u && accum_head) { tls_list_bulk_put(tls, accum_head, accum_tail, total, class_idx); return (int)total; } return 0; } // Spill excess TLS list back to owners (96-line hot path) static inline void tls_list_spill_excess(int class_idx, TinyTLSList* tls) { uint32_t cap = tls_list_spill_threshold(tls); if (tls->count <= cap) return; uint32_t excess = tls->count - cap; void* head = NULL; void* tail = NULL; uint32_t taken = tls_list_bulk_take(tls, excess, &head, &tail, class_idx); if (taken == 0u || head == NULL) return; #if HAKMEM_PROF_STATIC && HAKMEM_BUILD_DEBUG struct timespec ts_tls; int prof_sample = hkm_prof_begin(&ts_tls); #endif #if !HAKMEM_BUILD_RELEASE tiny_small_mags_init_once(); if (class_idx > 3) tiny_mag_init_if_needed(class_idx); TinyTLSMag* mag = &g_tls_mags[class_idx]; #else TinyTLSMag* mag = NULL; (void)mag; #endif void* requeue_head = NULL; void* requeue_tail = NULL; uint32_t requeue_count = 0; uint32_t self_tid = tiny_self_u32(); void* node = head; while (node) { void* next = tiny_next_read(class_idx, node); int handled = 0; // Phase 1: Try SuperSlab first (registry-based lookup, no false positives) SuperSlab* ss = hak_super_lookup(node); if (ss && ss->magic == SUPERSLAB_MAGIC) { int slab_idx = slab_index_for(ss, node); TinySlabMeta* meta = &ss->slabs[slab_idx]; if (!tiny_remote_guard_allow_local_push(ss, slab_idx, meta, node, "tls_spill_ss", self_tid)) { (void)ss_remote_push(ss, slab_idx, node); if (meta->used > 0) meta->used--; handled = 1; } else { void* prev = meta->freelist; // BUG FIX: Use Box API to write next pointer at correct offset tiny_next_write(class_idx, node, prev); // freelist within slab uses base link meta->freelist = node; tiny_failfast_log("tls_spill_ss", ss->size_class, ss, meta, node, prev); if (meta->used > 0) meta->used--; // Active was decremented at free time handled = 1; } #if HAKMEM_BUILD_DEBUG g_tls_spill_ss_count[class_idx]++; #endif tiny_obs_record(3, class_idx); // TINY_OBS_SPILL_SS } if (!handled) { TinySlab* owner = tls_active_owner_for_ptr(class_idx, node); if (owner && !mini_mag_is_full(&owner->mini_mag)) { mini_mag_push(&owner->mini_mag, node); handled = 1; #if HAKMEM_BUILD_DEBUG g_tls_spill_owner_count[class_idx]++; #endif tiny_obs_record(4, class_idx); // TINY_OBS_SPILL_OWNER } } #if !HAKMEM_BUILD_RELEASE if (!handled) { if (mag && mag->cap > 0 && mag->top < mag->cap) { mag->items[mag->top].ptr = node; #if HAKMEM_TINY_MAG_OWNER TinySlab* owner_hint = tls_active_owner_for_ptr(class_idx, node); mag->items[mag->top].owner = owner_hint; #endif mag->top++; if (__builtin_expect(tiny_remote_watch_is(node), 0)) { SuperSlab* watch_ss = hak_super_lookup(node); int watch_idx = (watch_ss && watch_ss->magic == SUPERSLAB_MAGIC) ? slab_index_for(watch_ss, node) : -1; tiny_remote_watch_note("mag_push", watch_ss, watch_idx, node, 0xA242u, 0, 0); } handled = 1; #if HAKMEM_BUILD_DEBUG g_tls_spill_mag_count[class_idx]++; #endif tiny_obs_record(5, class_idx); // TINY_OBS_SPILL_MAG } } #endif if (!handled) { tiny_next_write(class_idx, node, requeue_head); if (!requeue_head) requeue_tail = node; requeue_head = node; requeue_count++; #if HAKMEM_BUILD_DEBUG g_tls_spill_requeue_count[class_idx]++; #endif tiny_obs_record(6, class_idx); // TINY_OBS_SPILL_REQUEUE } node = next; } if (requeue_head) { tls_list_bulk_put(tls, requeue_head, requeue_tail, requeue_count, class_idx); } #if HAKMEM_PROF_STATIC && HAKMEM_BUILD_DEBUG hkm_prof_end(prof_sample, HKP_TINY_TLS_SPILL, &ts_tls); #endif } // ============================================================================ // TLS Operations - Cold Path Functions (Implemented in .c) // ============================================================================ // Drain all TLS caches back to global pool (cold path, 89 lines) void tiny_tls_cache_drain(int class_idx); #endif // HAKMEM_TINY_TLS_OPS_H