// hakmem_tiny_refill_p0.inc.h // ChatGPT Pro P0: Complete Batch Refill (SLL用) // // Purpose: Optimize sll_refill_small_from_ss with batch carving // Based on: tls_refill_from_tls_slab (hakmem_tiny_tls_ops.h:115-126) // // Key optimization: ss_active_inc × 64 → ss_active_add × 1 // // Maintains: Existing g_tls_sll_head fast path (no changes to hot path!) // // Enable P0 by default for testing (set to 0 to disable) #ifndef HAKMEM_TINY_P0_BATCH_REFILL #define HAKMEM_TINY_P0_BATCH_REFILL 1 #endif #ifndef HAKMEM_TINY_REFILL_P0_INC_H #define HAKMEM_TINY_REFILL_P0_INC_H // Debug counters (compile-time gated) #if HAKMEM_DEBUG_COUNTERS extern unsigned long long g_rf_hit_slab[]; // Diagnostic counters for refill early returns extern unsigned long long g_rf_early_no_ss[]; // Line 27: !g_use_superslab extern unsigned long long g_rf_early_no_meta[]; // Line 35: !meta extern unsigned long long g_rf_early_no_room[]; // Line 40: room <= 0 extern unsigned long long g_rf_early_want_zero[]; // Line 55: want == 0 #endif // Refill TLS SLL from SuperSlab with batch carving (P0 optimization) #include "tiny_refill_opt.h" static inline int sll_refill_batch_from_ss(int class_idx, int max_take) { if (!g_use_superslab || max_take <= 0) { #if HAKMEM_DEBUG_COUNTERS if (!g_use_superslab) g_rf_early_no_ss[class_idx]++; #endif return 0; } TinyTLSSlab* tls = &g_tls_slabs[class_idx]; if (!tls->ss) { // Try to obtain a SuperSlab for this class if (superslab_refill(class_idx) == NULL) return 0; } TinySlabMeta* meta = tls->meta; if (!meta) { #if HAKMEM_DEBUG_COUNTERS g_rf_early_no_meta[class_idx]++; #endif return 0; } // Compute how many we can actually push into SLL without overflow uint32_t sll_cap = sll_cap_for_class(class_idx, (uint32_t)TINY_TLS_MAG_CAP); int room = (int)sll_cap - (int)g_tls_sll_count[class_idx]; if (room <= 0) { #if HAKMEM_DEBUG_COUNTERS g_rf_early_no_room[class_idx]++; #endif return 0; } // For hot tiny classes (0..3), allow an env override to increase batch size uint32_t want = (uint32_t)max_take; if (class_idx <= 3) { static int g_hot_override = -2; // -2 = uninitialized, -1 = no override, >0 = value if (__builtin_expect(g_hot_override == -2, 0)) { const char* e = getenv("HAKMEM_TINY_REFILL_COUNT_HOT"); int v = (e && *e) ? atoi(e) : -1; if (v < 0) v = -1; if (v > 256) v = 256; // clamp g_hot_override = v; } if (g_hot_override > 0) want = (uint32_t)g_hot_override; } else { // Mid classes (>=4): optional override for batch size static int g_mid_override = -2; // -2 = uninitialized, -1 = no override, >0 = value if (__builtin_expect(g_mid_override == -2, 0)) { const char* e = getenv("HAKMEM_TINY_REFILL_COUNT_MID"); int v = (e && *e) ? atoi(e) : -1; if (v < 0) v = -1; if (v > 256) v = 256; // clamp g_mid_override = v; } if (g_mid_override > 0) want = (uint32_t)g_mid_override; } if (want > (uint32_t)room) want = (uint32_t)room; if (want == 0) { #if HAKMEM_DEBUG_COUNTERS g_rf_early_want_zero[class_idx]++; #endif return 0; } size_t bs = g_tiny_class_sizes[class_idx]; int total_taken = 0; // === P0 Batch Carving Loop === while (want > 0) { uintptr_t ss_base = 0; uintptr_t ss_limit = 0; if (tls->ss) { ss_base = (uintptr_t)tls->ss; ss_limit = ss_base + ((size_t)1ULL << tls->ss->lg_size); } // Handle freelist items first (usually 0) TinyRefillChain chain; uint32_t from_freelist = trc_pop_from_freelist( meta, class_idx, ss_base, ss_limit, bs, want, &chain); if (from_freelist > 0) { trc_splice_to_sll(class_idx, &chain, &g_tls_sll_head[class_idx], &g_tls_sll_count[class_idx]); // FIX: Blocks from freelist were decremented when freed, must increment when allocated ss_active_add(tls->ss, from_freelist); extern unsigned long long g_rf_freelist_items[]; g_rf_freelist_items[class_idx] += from_freelist; total_taken += from_freelist; want -= from_freelist; if (want == 0) break; } // === Linear Carve (P0 Key Optimization!) === if (meta->used >= meta->capacity) { // Slab exhausted, try to get another if (superslab_refill(class_idx) == NULL) break; meta = tls->meta; if (!meta) break; continue; } uint32_t available = meta->capacity - meta->used; uint32_t batch = want; if (batch > available) batch = available; if (batch == 0) break; // Get slab base uint8_t* slab_base = tls->slab_base ? tls->slab_base : tiny_slab_base_for(tls->ss, tls->slab_idx); TinyRefillChain carve; trc_linear_carve(slab_base, bs, meta, batch, &carve); trc_splice_to_sll(class_idx, &carve, &g_tls_sll_head[class_idx], &g_tls_sll_count[class_idx]); // FIX: Update SuperSlab active counter (was missing!) ss_active_add(tls->ss, batch); extern unsigned long long g_rf_carve_items[]; g_rf_carve_items[class_idx] += batch; total_taken += batch; want -= batch; } #if HAKMEM_DEBUG_COUNTERS // Track successful SLL refills from SuperSlab (compile-time gated) // NOTE: Increment unconditionally to verify counter is working g_rf_hit_slab[class_idx]++; #endif return total_taken; } #endif // HAKMEM_TINY_REFILL_P0_INC_H