// tiny_heap_v2.h - Tiny per-thread heap (Front-V2, tcache-like) // Goal: // - 1 レイヤの TLS magazine を前段に置き、FastCache/SFC 等をバイパス。 // - ENV で A/B 切り替え可能(デフォルト OFF)。戻しやすく安全に。 // - 対象は C0–C3 のみ。Magazine が空なら SLL→SS 経由で補充。 #ifndef HAK_FRONT_TINY_HEAP_V2_H #define HAK_FRONT_TINY_HEAP_V2_H #include "../hakmem_tiny.h" #include "../box/tls_sll_box.h" #include "../hakmem_env_cache.h" #ifndef TINY_FRONT_TLS_SLL_ENABLED #define HAK_TINY_TLS_SLL_ENABLED_FALLBACK 1 #else #define HAK_TINY_TLS_SLL_ENABLED_FALLBACK TINY_FRONT_TLS_SLL_ENABLED #endif #ifndef TINY_FRONT_HEAP_V2_ENABLED #define HAK_TINY_HEAP_V2_ENABLED_FALLBACK tiny_heap_v2_enabled() #else #define HAK_TINY_HEAP_V2_ENABLED_FALLBACK TINY_FRONT_HEAP_V2_ENABLED #endif #include #include // Phase 13-B: Magazine capacity (same as Phase 13-A) #ifndef TINY_HEAP_V2_MAG_CAP #define TINY_HEAP_V2_MAG_CAP 16 #endif // TinyHeapV2 Magazine (per-thread, per-class) typedef struct { void* items[TINY_HEAP_V2_MAG_CAP]; int top; } TinyHeapV2Mag; // TinyHeapV2 Statistics (per-thread, per-class) typedef struct { uint64_t alloc_calls; uint64_t mag_hits; uint64_t refill_calls; uint64_t refill_blocks; uint64_t backend_oom; } TinyHeapV2Stats; // External TLS variables (defined in hakmem_tiny.c) extern __thread TinyHeapV2Mag g_tiny_heap_v2_mag[TINY_NUM_CLASSES]; extern __thread TinyHeapV2Stats g_tiny_heap_v2_stats[TINY_NUM_CLASSES]; extern __thread int g_tls_heap_v2_initialized; // Backend refill helpers (implemented in Tiny refill path) int sll_refill_small_from_ss(int class_idx, int max_take); int sll_refill_batch_from_ss(int class_idx, int max_take); // Enable flag (cached) // ENV: HAKMEM_TINY_FRONT_V2 // - 0 (default): OFF // - 1: ON (Front-V2 有効化、FastCache/SFC を経由せず magazine を先頭に) static inline int tiny_heap_v2_enabled(void) { static int g_enable = -1; if (__builtin_expect(g_enable == -1, 0)) { const char* e = getenv("HAKMEM_TINY_FRONT_V2"); g_enable = (e && *e && *e != '0') ? 1 : 0; } return g_enable; } // Class-specific enable mask (cached) // ENV: HAKMEM_TINY_HEAP_V2_CLASS_MASK (bitmask: bit 0=C0, bit 1=C1, bit 2=C2, bit 3=C3) // Default: 0xE (C1-C3 only, skip C0 8B due to -5% regression) // Example: 0x2 = C1 only, 0x8 = C3 only, 0x6 = C1+C2, 0xF = all C0-C3 static inline int tiny_heap_v2_class_enabled(int class_idx) { static int g_class_mask = -1; if (__builtin_expect(g_class_mask == -1, 0)) { const char* e = getenv("HAKMEM_TINY_HEAP_V2_CLASS_MASK"); if (e && *e) { // Parse hex or decimal char* endptr; long val = strtol(e, &endptr, 0); // 0 = auto-detect base (0x for hex, else decimal) g_class_mask = (int)val; } else { g_class_mask = 0xE; // Default: C1-C3 (16/32/64B), skip C0 8B (-5% regression) } } if (class_idx < 0 || class_idx >= 8) return 0; return (g_class_mask & (1 << class_idx)) != 0; } // Leftover mode flag (cached) // ENV: HAKMEM_TINY_HEAP_V2_LEFTOVER_MODE // - 0 (default): L0 gets blocks first ("stealing" design, +18% @ 32B) // - 1: L1 primary owner, L0 gets leftovers ("leftover" design, Box-clean but -5% @ 16B) // // Decision (Phase 13-B): Default to Mode 0 (Stealing) for performance // Rationale (ChatGPT analysis): // - Learning layer primarily observes Superslab/Pool statistics // - L0 stealing doesn't corrupt Superslab carving/drain signals // - If needed, add TinyHeapV2 hit/miss counters to learning layer later // - Performance gain (+18% @ 32B) justifies less-strict Box boundary static inline int tiny_heap_v2_leftover_mode(void) { static int g_leftover_mode = -1; if (__builtin_expect(g_leftover_mode == -1, 0)) { const char* e = getenv("HAKMEM_TINY_HEAP_V2_LEFTOVER_MODE"); g_leftover_mode = (e && *e && *e != '0') ? 1 : 0; } return g_leftover_mode; } // NOTE: This header MUST be included AFTER tiny_alloc_fast.inc.h! // It uses fastcache_pop, tiny_alloc_fast_refill, hak_tiny_size_to_class which are // static inline functions defined in tiny_alloc_fast.inc.h and related headers. // Phase 13-A Step 2: Try to push a block into TinyHeapV2 magazine // Called from free path to supply magazine with "leftover" blocks. // Returns: 1 if pushed successfully, 0 if magazine is full static inline int tiny_heap_v2_try_push(int class_idx, void* base) { // 1. Check if class is enabled if (class_idx < 0 || class_idx > 3) return 0; if (!tiny_heap_v2_enabled()) return 0; if (!tiny_heap_v2_class_enabled(class_idx)) return 0; TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[class_idx]; // 2. Check if magazine has room if (mag->top >= TINY_HEAP_V2_MAG_CAP) { return 0; // Magazine full } // 3. Push BASE pointer into magazine mag->items[mag->top++] = base; // DEBUG: Log push events #if !HAKMEM_BUILD_RELEASE static int g_push_dbg = -1; if (g_push_dbg == -1) { const char* e = getenv("HAKMEM_TINY_HEAP_V2_DEBUG"); g_push_dbg = (e && *e && *e != '0') ? 1 : 0; } if (g_push_dbg) { static __thread int g_push_count[TINY_NUM_CLASSES] = {0}; if (g_push_count[class_idx] < 5) { fprintf(stderr, "[HeapV2-PUSH] C%d push #%d, base=%p, mag->top=%d\n", class_idx, g_push_count[class_idx]++, base, mag->top); } } #endif return 1; // Success } // Stats gate (ENV cached) static inline int tiny_heap_v2_stats_enabled(void) { return HAK_ENV_TINY_HEAP_V2_STATS(); } // TLS HeapV2 initialization barrier (ensures mag->top is zero on first use) static inline void tiny_heap_v2_ensure_init(void) { extern __thread int g_tls_heap_v2_initialized; extern __thread TinyHeapV2Mag g_tiny_heap_v2_mag[]; if (__builtin_expect(!g_tls_heap_v2_initialized, 0)) { for (int i = 0; i < TINY_NUM_CLASSES; i++) { g_tiny_heap_v2_mag[i].top = 0; } g_tls_heap_v2_initialized = 1; } } // Magazine refill from TLS SLL/backend static inline int tiny_heap_v2_refill_mag(int class_idx) { // FIX: Ensure TLS is initialized before first magazine access tiny_heap_v2_ensure_init(); if (class_idx < 0 || class_idx > 3) return 0; if (!tiny_heap_v2_class_enabled(class_idx)) return 0; // Phase 7-Step7: Use config macro for dead code elimination in PGO mode if (!HAK_TINY_TLS_SLL_ENABLED_FALLBACK) return 0; TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[class_idx]; const int cap = TINY_HEAP_V2_MAG_CAP; int filled = 0; // FIX: Validate mag->top before use (prevent uninitialized TLS corruption) if (mag->top < 0 || mag->top > cap) { static __thread int s_reset_logged[TINY_NUM_CLASSES] = {0}; if (!s_reset_logged[class_idx]) { fprintf(stderr, "[HEAP_V2_REFILL] C%d mag->top=%d corrupted, reset to 0\n", class_idx, mag->top); s_reset_logged[class_idx] = 1; } mag->top = 0; } // First, steal from TLS SLL if already available. while (mag->top < cap) { void* base = NULL; if (!tls_sll_pop(class_idx, &base)) break; mag->items[mag->top++] = base; filled++; } // If magazine is still empty, ask backend to refill SLL once, then steal again. if (mag->top < cap && filled == 0) { #if HAKMEM_TINY_P0_BATCH_REFILL (void)sll_refill_batch_from_ss(class_idx, cap); #else (void)sll_refill_small_from_ss(class_idx, cap); #endif while (mag->top < cap) { void* base = NULL; if (!tls_sll_pop(class_idx, &base)) break; mag->items[mag->top++] = base; filled++; } } if (__builtin_expect(tiny_heap_v2_stats_enabled(), 0)) { if (filled > 0) { g_tiny_heap_v2_stats[class_idx].refill_calls++; g_tiny_heap_v2_stats[class_idx].refill_blocks += (uint64_t)filled; } } return filled; } // Magazine pop (fast path) static inline void* tiny_heap_v2_alloc_by_class(int class_idx) { // FIX: Ensure TLS is initialized before first magazine access tiny_heap_v2_ensure_init(); if (class_idx < 0 || class_idx > 3) return NULL; // Phase 7-Step8: Use config macro for dead code elimination in PGO mode if (!HAK_TINY_HEAP_V2_ENABLED_FALLBACK) return NULL; if (!tiny_heap_v2_class_enabled(class_idx)) return NULL; TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[class_idx]; // Hit: magazine has entries if (__builtin_expect(mag->top > 0, 1)) { // FIX: Add underflow protection before array access const int cap = TINY_HEAP_V2_MAG_CAP; if (mag->top > cap || mag->top < 0) { static __thread int s_reset_logged[TINY_NUM_CLASSES] = {0}; if (!s_reset_logged[class_idx]) { fprintf(stderr, "[HEAP_V2_ALLOC] C%d mag->top=%d corrupted, reset to 0\n", class_idx, mag->top); s_reset_logged[class_idx] = 1; } mag->top = 0; return NULL; // Fall through to refill path } if (__builtin_expect(tiny_heap_v2_stats_enabled(), 0)) { g_tiny_heap_v2_stats[class_idx].alloc_calls++; g_tiny_heap_v2_stats[class_idx].mag_hits++; } return mag->items[--mag->top]; } // Miss: try single refill from SLL/backend int filled = tiny_heap_v2_refill_mag(class_idx); if (filled > 0 && mag->top > 0) { if (__builtin_expect(tiny_heap_v2_stats_enabled(), 0)) { g_tiny_heap_v2_stats[class_idx].alloc_calls++; g_tiny_heap_v2_stats[class_idx].mag_hits++; } return mag->items[--mag->top]; } if (__builtin_expect(tiny_heap_v2_stats_enabled(), 0)) { g_tiny_heap_v2_stats[class_idx].backend_oom++; } return NULL; } // Print statistics (called at program exit if HAKMEM_TINY_HEAP_V2_STATS=1, impl in hakmem_tiny.c) void tiny_heap_v2_print_stats(void); #endif // HAK_FRONT_TINY_HEAP_V2_H