// tiny_front_cold_box.h - Phase 4-Step2: Tiny Front Cold Path Box // Purpose: Slow path allocation (refill, diagnostics, error handling) // Contract: Called on cache miss, handles SuperSlab refill + diagnostics // Performance: Optimized for correctness, not speed (noinline, cold) // // Design Principles (Box Pattern): // 1. Single Responsibility: Cold path ONLY (refill, errors, diagnostics) // 2. Clear Contract: Returns USER pointer or NULL, handles all edge cases // 3. Observable: Debug logging, error reporting, telemetry // 4. Safe: Full error checking, defensive programming // 5. Testable: Isolated from hot path, easy to test edge cases // // Performance Impact: // - noinline: Keeps hot path small (better i-cache locality) // - cold attribute: Hints compiler to optimize for size, not speed // - Infrequent execution: Called only on cache miss (~1-5% of allocations) #ifndef TINY_FRONT_COLD_BOX_H #define TINY_FRONT_COLD_BOX_H #include #include #include #include "../hakmem_build_flags.h" #include "../hakmem_tiny_config.h" #include "../tiny_region_id.h" #include "../front/tiny_unified_cache.h" // For TinyUnifiedCache, unified_cache_refill // ============================================================================ // Box 3: Tiny Cold Refill + Alloc // ============================================================================ // Refill cache from SuperSlab + allocate one object // // CONTRACT: // Input: class_idx (0-7, pre-validated by caller) // Output: USER pointer on success, NULL on failure // Precondition: Cache miss detected by hot path // Postcondition: Cache refilled (if possible), one object allocated // // DESIGN: // - noinline: Keeps hot path small (better i-cache) // - cold: Hints compiler this is infrequent code // - Defensive: Full error checking, diagnostics // // PERFORMANCE: // - Called infrequently (~1-5% of allocations) // - Optimized for correctness, not speed // - Refill amortizes cost over batch (e.g., 64 objects) // // ERROR HANDLING: // - SuperSlab allocation failure → NULL // - Cache refill failure → NULL (fallback to normal path) // - Logs errors in debug builds // __attribute__((noinline, cold)) static inline void* tiny_cold_refill_and_alloc(int class_idx) { // Refill cache from SuperSlab (batch allocation) // unified_cache_refill() returns first block directly void* base = unified_cache_refill(class_idx); if (base == NULL) { // Refill failed (SuperSlab allocation error, or cache disabled) #if !HAKMEM_BUILD_RELEASE static __thread uint64_t g_refill_fail_count[TINY_NUM_CLASSES] = {0}; if (g_refill_fail_count[class_idx] < 10) { fprintf(stderr, "[COLD_BOX] Refill failed: class_idx=%d\n", class_idx); fflush(stderr); g_refill_fail_count[class_idx]++; } #endif return NULL; } // Success: return USER pointer // NOTE: Header already written by unified_cache_refill() // (Removed redundant tiny_region_id_write_header() - P2 fix) #if HAKMEM_TINY_HEADER_CLASSIDX return (void*)((char*)base + 1); // USER pointer #else return base; #endif } // ============================================================================ // Box 3b: Tiny Cold Drain + Free // ============================================================================ // Drain cache to SuperSlab + free one object // // CONTRACT: // Input: class_idx (0-7), base pointer (BASE, not USER) // Output: 1=SUCCESS, 0=FAILURE // Precondition: Cache full detected by hot path // Postcondition: Cache drained (if possible), object freed // // DESIGN: // - noinline: Keeps hot path small // - cold: Infrequent execution // - Batch drain: Drain multiple objects to amortize cost // // PERFORMANCE: // - Called infrequently (~1-5% of frees) // - Batch drain amortizes cost (e.g., drain 32 objects) // __attribute__((noinline, cold)) static inline int tiny_cold_drain_and_free(int class_idx, void* base) { extern __thread TinyUnifiedCache g_unified_cache[]; TinyUnifiedCache* cache = &g_unified_cache[class_idx]; // TODO: Implement batch drain logic // For now, just reject the free (caller falls back to normal path) #if !HAKMEM_BUILD_RELEASE static __thread uint64_t g_drain_count[TINY_NUM_CLASSES] = {0}; if (g_drain_count[class_idx] < 10) { fprintf(stderr, "[COLD_BOX] Cache full, drain needed: class_idx=%d tail=%u head=%u\n", class_idx, cache->tail, cache->head); fflush(stderr); g_drain_count[class_idx]++; } #endif // Fallback: Return 0 (caller handles via normal free path) (void)base; // Unused for now return 0; } // ============================================================================ // Box 3c: Tiny Cold Error Reporting // ============================================================================ // Report error (debug builds only) // // CONTRACT: // Input: class_idx, error reason string // Output: void (logs to stderr) // Precondition: Error detected in hot/cold path // Postcondition: Error logged (debug only, zero overhead in release) // __attribute__((noinline, cold)) static inline void tiny_cold_report_error(int class_idx, const char* reason) { #if !HAKMEM_BUILD_RELEASE fprintf(stderr, "[COLD_BOX_ERROR] class_idx=%d reason=%s\n", class_idx, reason); fflush(stderr); #else (void)class_idx; (void)reason; #endif } // ============================================================================ // Performance Notes // ============================================================================ // Cold path optimizations: // 1. noinline: Reduces hot path code size → better i-cache // 2. cold attribute: Compiler optimizes for size, not speed // 3. Batch operations: Refill/drain multiple objects (amortize cost) // 4. Defensive code: Full error checking (correctness > speed) // // Expected call frequency: // - Refill: ~1-5% of allocations (depends on cache size) // - Drain: ~1-5% of frees (depends on allocation pattern) // - Error: <0.01% (only on actual errors) // // Impact on hot path: // - Hot path stays small (~10-20 instructions) // - Better i-cache locality (hot path doesn't include cold code) // - CPU branch predictor learns hot path quickly #endif // TINY_FRONT_COLD_BOX_H