#ifndef HAKMEM_TINY_REFILL_P0_INC_H #define HAKMEM_TINY_REFILL_P0_INC_H #include #include // hakmem_tiny_refill_p0.inc.h // P0: Batch refill implementation (sll_refill_batch_from_ss only). // Phase 12: DO NOT alias or redefine sll_refill_small_from_ss here. // NOTE: This file is active only when HAKMEM_TINY_P0_BATCH_REFILL=1. #if HAKMEM_TINY_P0_BATCH_REFILL #include "hakmem_tiny_integrity.h" #include "tiny_box_geometry.h" // Box 3: Geometry & Capacity Calculator #include "tiny_refill_opt.h" #include "tiny_fc_api.h" #include "superslab/superslab_inline.h" // For _ss_remote_drain_to_freelist_unsafe() #include "box/integrity_box.h" // Box I: Integrity verification (Priority ALPHA) #include "box/tiny_next_ptr_box.h" // Box API: Next pointer read/write // Debug counters (compile-time gated) #if HAKMEM_DEBUG_COUNTERS extern unsigned long long g_rf_hit_slab[]; extern unsigned long long g_rf_early_no_ss[]; extern unsigned long long g_rf_early_no_meta[]; extern unsigned long long g_rf_early_no_room[]; extern unsigned long long g_rf_early_want_zero[]; #endif // P0 diagnostic logging is now permanently disabled (former ENV toggle removed). static inline int p0_should_log(void) { return 0; } // P0 batch refill entry point static inline int sll_refill_batch_from_ss(int class_idx, int max_take) { // Phase E1-CORRECT: C7 now has headers, can use P0 batch refill HAK_CHECK_CLASS_IDX(class_idx, "sll_refill_batch_from_ss"); if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) { static _Atomic int g_p0_class_oob_log = 0; if (atomic_fetch_add_explicit(&g_p0_class_oob_log, 1, memory_order_relaxed) == 0) { fprintf(stderr, "[P0_CLASS_OOB] class_idx=%d max_take=%d\n", class_idx, max_take); } return 0; } if (!g_use_superslab || max_take <= 0) { #if HAKMEM_DEBUG_COUNTERS if (!g_use_superslab) g_rf_early_no_ss[class_idx]++; #endif return 0; } TinyTLSSlab* tls = &g_tls_slabs[class_idx]; // Phase 3c L1D Opt: Prefetch SuperSlab hot fields early if (tls->ss) { __builtin_prefetch(&tls->ss->slab_bitmap, 0, 3); __builtin_prefetch(&tls->ss->total_active_blocks, 0, 3); } uint32_t active_before = 0; if (tls->ss) { active_before = atomic_load_explicit(&tls->ss->total_active_blocks, memory_order_relaxed); } if (!tls->ss) { if (!superslab_refill(class_idx)) { return 0; } } TinySlabMeta* meta = tls->meta; if (!meta) { #if HAKMEM_DEBUG_COUNTERS g_rf_early_no_meta[class_idx]++; #endif return 0; } // Phase 3c L1D Opt: Prefetch SlabMeta hot fields (freelist, used, capacity) __builtin_prefetch(&meta->freelist, 0, 3); #if HAKMEM_INTEGRITY_LEVEL >= 4 uint8_t* initial_slab_base = tls->slab_base ? tls->slab_base : tiny_slab_base_for(tls->ss, tls->slab_idx); SlabMetadataState meta_initial = integrity_capture_slab_metadata(meta, initial_slab_base, class_idx); INTEGRITY_CHECK_SLAB_METADATA(meta_initial, "P0 refill entry"); #endif // Optional: Direct-FC fast path(全クラス対応 A/B)。 // Fixed defaults after ENV cleanup: // - C5優先: enabled // - C7のみ: disabled // - 全クラス: disabled do { const int g_direct_fc = 1; const int g_direct_fc_c7 = 0; const int g_direct_fc_all = 0; if (__builtin_expect(g_direct_fc_all || (g_direct_fc && class_idx == 5) || (g_direct_fc_c7 && class_idx == 7), 0)) { int room = tiny_fc_room(class_idx); if (room <= 0) return 0; uint32_t rmt = atomic_load_explicit( &tls->ss->remote_counts[tls->slab_idx], memory_order_relaxed); const int g_drain_th = 64; if (rmt >= (uint32_t)g_drain_th) { _ss_remote_drain_to_freelist_unsafe( tls->ss, tls->slab_idx, tls->meta); } void* out[128]; int produced = 0; TinySlabMeta* m = tls->meta; size_t bs = tiny_stride_for_class(class_idx); uint8_t* base = tls->slab_base ? tls->slab_base : tiny_slab_base_for_geometry(tls->ss, tls->slab_idx); while (produced < room) { if (m->freelist) { void* p = m->freelist; m->freelist = tiny_next_read(class_idx, p); m->used++; out[produced++] = p; } else if (m->carved < m->capacity) { void* p = (void*)(base + ((size_t)m->carved * bs)); m->carved++; m->used++; out[produced++] = p; } else { if (!superslab_refill(class_idx)) break; tls = &g_tls_slabs[class_idx]; m = tls->meta; base = tls->slab_base ? tls->slab_base : tiny_slab_base_for(tls->ss, tls->slab_idx); } } if (produced > 0) { ss_active_add(tls->ss, (uint32_t)produced); (void)tiny_fc_push_bulk(class_idx, out, produced); return produced; } // fallthrough to regular path } } while (0); uint32_t sll_cap = sll_cap_for_class(class_idx, (uint32_t)TINY_TLS_MAG_CAP); int room = (int)sll_cap - (int)g_tls_sll[class_idx].count; if (room <= 0) { #if HAKMEM_DEBUG_COUNTERS g_rf_early_no_room[class_idx]++; #endif return 0; } uint32_t want = (uint32_t)max_take; if (want > (uint32_t)room) want = (uint32_t)room; if (want == 0) { #if HAKMEM_DEBUG_COUNTERS g_rf_early_want_zero[class_idx]++; #endif return 0; } size_t bs = tiny_stride_for_class(class_idx); int total_taken = 0; while (want > 0) { uintptr_t ss_base = 0; uintptr_t ss_limit = 0; if (tls->ss && tls->slab_idx >= 0) { uint8_t* slab_base = tiny_slab_base_for_geometry(tls->ss, tls->slab_idx); ss_base = (uintptr_t)slab_base; ss_limit = ss_base + tiny_usable_bytes_for_slab(tls->slab_idx); } if (tls->ss && tls->slab_idx >= 0) { uint32_t remote_count = atomic_load_explicit( &tls->ss->remote_counts[tls->slab_idx], memory_order_relaxed); if (remote_count > 0) { _ss_remote_drain_to_freelist_unsafe(tls->ss, tls->slab_idx, meta); } } TinyRefillChain chain; uint32_t from_freelist = trc_pop_from_freelist( meta, class_idx, ss_base, ss_limit, bs, want, &chain); if (from_freelist > 0) { trc_splice_to_sll( class_idx, &chain, &g_tls_sll[class_idx].head, &g_tls_sll[class_idx].count); ss_active_add(tls->ss, from_freelist); // Phase 1: Atomic increment for MT safety atomic_fetch_add_explicit(&meta->used, from_freelist, memory_order_relaxed); // Phase 3c L1D Opt: Prefetch next freelist entry after refill // Phase 1: Use atomic load for MT safety void* next_head = slab_freelist_load_relaxed(meta); if (next_head) { __builtin_prefetch(next_head, 0, 3); } #if HAKMEM_DEBUG_COUNTERS extern unsigned long long g_rf_freelist_items[]; g_rf_freelist_items[class_idx] += from_freelist; #endif total_taken += from_freelist; want -= from_freelist; if (want == 0) break; } if (meta->carved >= meta->capacity) { if (!superslab_refill(class_idx)) break; tls = &g_tls_slabs[class_idx]; meta = tls->meta; if (!meta) break; continue; } // NOTE: Pre-carve geometry validation removed (redundant) // Stride table is now correct in tiny_block_stride_for_class(), // and slab geometry is validated at allocation time by shared_pool. // Defense-in-depth validation adds overhead without benefit. uint32_t available = meta->capacity - meta->carved; uint32_t batch = want; if (batch > available) batch = available; if (batch == 0) break; uint8_t* slab_base = tls->slab_base ? tls->slab_base : tiny_slab_base_for(tls->ss, tls->slab_idx); TinyRefillChain carve; trc_linear_carve(slab_base, bs, meta, batch, class_idx, &carve); trc_splice_to_sll( class_idx, &carve, &g_tls_sll[class_idx].head, &g_tls_sll[class_idx].count); ss_active_add(tls->ss, batch); #if HAKMEM_DEBUG_COUNTERS extern unsigned long long g_rf_carve_items[]; g_rf_carve_items[class_idx] += batch; #endif total_taken += batch; want -= batch; } #if HAKMEM_DEBUG_COUNTERS g_rf_hit_slab[class_idx]++; #endif if (tls->ss && p0_should_log()) { uint32_t active_after = atomic_load_explicit( &tls->ss->total_active_blocks, memory_order_relaxed); int32_t delta = (int32_t)active_after - (int32_t)active_before; fprintf(stderr, "[P0_COUNTER] cls=%d slab=%d taken=%d active_delta=%d\n", class_idx, tls->slab_idx, total_taken, delta); } return total_taken; } #endif // HAKMEM_TINY_P0_BATCH_REFILL #endif // HAKMEM_TINY_REFILL_P0_INC_H