Files
hakmem/core/box/tiny_front_cold_box.h
Moe Charm (CI) 0c0d9c8c0b Unify Unified Cache API to BASE-only pointer type with Phantom typing
Core Changes:
- Modified: core/front/tiny_unified_cache.h
  * API signatures changed to use hak_base_ptr_t (Phantom type)
  * unified_cache_pop() returns hak_base_ptr_t (was void*)
  * unified_cache_push() accepts hak_base_ptr_t base (was void*)
  * unified_cache_pop_or_refill() returns hak_base_ptr_t (was void*)
  * Added #include "../box/ptr_type_box.h" for Phantom types

- Modified: core/front/tiny_unified_cache.c
  * unified_cache_refill() return type changed to hak_base_ptr_t
  * Uses HAK_BASE_FROM_RAW() for wrapping return values
  * Uses HAK_BASE_TO_RAW() for unwrapping parameters
  * Maintains internal void* storage in slots array

- Modified: core/box/tiny_front_cold_box.h
  * Uses hak_base_ptr_t from unified_cache_refill()
  * Uses hak_base_is_null() for NULL checks
  * Maintains tiny_user_offset() for BASE→USER conversion
  * Cold path refill integration updated to Phantom types

- Modified: core/front/malloc_tiny_fast.h
  * Free path wraps BASE pointer with HAK_BASE_FROM_RAW()
  * When pushing to Unified Cache via unified_cache_push()

Design Rationale:
- Unified Cache API now exclusively handles BASE pointers (no USER mixing)
- Phantom types enforce type distinction at compile time (debug mode)
- Zero runtime overhead in Release mode (macros expand to identity)
- Hot paths (tiny_hot_alloc_fast, tiny_hot_free_fast) remain unchanged
- Layout consistency maintained via tiny_user_offset() Box

Validation:
- All 25 Phantom type usage sites verified (25/25 correct)
- HAK_BASE_FROM_RAW(): 5/5 correct wrappings
- HAK_BASE_TO_RAW(): 1/1 correct unwrapping
- hak_base_is_null(): 4/4 correct NULL checks
- Compilation: RELEASE=0 and RELEASE=1 both successful
- Smoke tests: 3/3 passed (simple_alloc, loop 10M, pool_tls)

Type Safety Benefits:
- Prevents USER/BASE pointer confusion at API boundaries
- Compile-time checking in debug builds via Phantom struct
- Zero cost abstraction in release builds
- Clear intent: Unified Cache exclusively stores BASE pointers

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-04 12:20:21 +09:00

176 lines
6.5 KiB
C

// tiny_front_cold_box.h - Phase 4-Step2: Tiny Front Cold Path Box
// Purpose: Slow path allocation (refill, diagnostics, error handling)
// Contract: Called on cache miss, handles SuperSlab refill + diagnostics
// Performance: Optimized for correctness, not speed (noinline, cold)
//
// Design Principles (Box Pattern):
// 1. Single Responsibility: Cold path ONLY (refill, errors, diagnostics)
// 2. Clear Contract: Returns USER pointer or NULL, handles all edge cases
// 3. Observable: Debug logging, error reporting, telemetry
// 4. Safe: Full error checking, defensive programming
// 5. Testable: Isolated from hot path, easy to test edge cases
//
// Performance Impact:
// - noinline: Keeps hot path small (better i-cache locality)
// - cold attribute: Hints compiler to optimize for size, not speed
// - Infrequent execution: Called only on cache miss (~1-5% of allocations)
#ifndef TINY_FRONT_COLD_BOX_H
#define TINY_FRONT_COLD_BOX_H
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
#include "../hakmem_build_flags.h"
#include "../hakmem_tiny_config.h"
#include "../tiny_region_id.h"
#include "../front/tiny_unified_cache.h" // For TinyUnifiedCache, unified_cache_refill
#include "tiny_layout_box.h" // For tiny_user_offset()
// ============================================================================
// Box 3: Tiny Cold Refill + Alloc
// ============================================================================
// Refill cache from SuperSlab + allocate one object
//
// CONTRACT:
// Input: class_idx (0-7, pre-validated by caller)
// Output: USER pointer on success, NULL on failure
// Precondition: Cache miss detected by hot path
// Postcondition: Cache refilled (if possible), one object allocated
//
// DESIGN:
// - noinline: Keeps hot path small (better i-cache)
// - cold: Hints compiler this is infrequent code
// - Defensive: Full error checking, diagnostics
//
// PERFORMANCE:
// - Called infrequently (~1-5% of allocations)
// - Optimized for correctness, not speed
// - Refill amortizes cost over batch (e.g., 64 objects)
//
// ERROR HANDLING:
// - SuperSlab allocation failure → NULL
// - Cache refill failure → NULL (fallback to normal path)
// - Logs errors in debug builds
//
__attribute__((noinline, cold))
static inline void* tiny_cold_refill_and_alloc(int class_idx) {
// Refill cache from SuperSlab (batch allocation)
// unified_cache_refill() returns first BASE block (wrapped)
hak_base_ptr_t base = unified_cache_refill(class_idx);
if (hak_base_is_null(base)) {
// Refill failed (SuperSlab allocation error, or cache disabled)
#if !HAKMEM_BUILD_RELEASE
static __thread uint64_t g_refill_fail_count[TINY_NUM_CLASSES] = {0};
if (g_refill_fail_count[class_idx] < 10) {
fprintf(stderr, "[COLD_BOX] Refill failed: class_idx=%d\n", class_idx);
fflush(stderr);
g_refill_fail_count[class_idx]++;
}
#endif
return NULL;
}
// Success: return USER pointer
// NOTE: Header already written by unified_cache_refill()
// (Removed redundant tiny_region_id_write_header() - P2 fix)
#if HAKMEM_TINY_HEADER_CLASSIDX
// Use centralized layout API for offset calculation
size_t user_offset = tiny_user_offset(class_idx);
void* raw_base = HAK_BASE_TO_RAW(base);
return (void*)((char*)raw_base + user_offset); // USER pointer
#else
return HAK_BASE_TO_RAW(base);
#endif
}
// ============================================================================
// Box 3b: Tiny Cold Drain + Free
// ============================================================================
// Drain cache to SuperSlab + free one object
//
// CONTRACT:
// Input: class_idx (0-7), base pointer (BASE, not USER)
// Output: 1=SUCCESS, 0=FAILURE
// Precondition: Cache full detected by hot path
// Postcondition: Cache drained (if possible), object freed
//
// DESIGN:
// - noinline: Keeps hot path small
// - cold: Infrequent execution
// - Batch drain: Drain multiple objects to amortize cost
//
// PERFORMANCE:
// - Called infrequently (~1-5% of frees)
// - Batch drain amortizes cost (e.g., drain 32 objects)
//
__attribute__((noinline, cold))
static inline int tiny_cold_drain_and_free(int class_idx, void* base) {
extern __thread TinyUnifiedCache g_unified_cache[];
TinyUnifiedCache* cache = &g_unified_cache[class_idx];
// TODO: Implement batch drain logic
// For now, just reject the free (caller falls back to normal path)
#if !HAKMEM_BUILD_RELEASE
static __thread uint64_t g_drain_count[TINY_NUM_CLASSES] = {0};
if (g_drain_count[class_idx] < 10) {
fprintf(stderr, "[COLD_BOX] Cache full, drain needed: class_idx=%d tail=%u head=%u\n",
class_idx, cache->tail, cache->head);
fflush(stderr);
g_drain_count[class_idx]++;
}
#endif
// Fallback: Return 0 (caller handles via normal free path)
(void)base; // Unused for now
return 0;
}
// ============================================================================
// Box 3c: Tiny Cold Error Reporting
// ============================================================================
// Report error (debug builds only)
//
// CONTRACT:
// Input: class_idx, error reason string
// Output: void (logs to stderr)
// Precondition: Error detected in hot/cold path
// Postcondition: Error logged (debug only, zero overhead in release)
//
__attribute__((noinline, cold))
static inline void tiny_cold_report_error(int class_idx, const char* reason) {
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[COLD_BOX_ERROR] class_idx=%d reason=%s\n", class_idx, reason);
fflush(stderr);
#else
(void)class_idx;
(void)reason;
#endif
}
// ============================================================================
// Performance Notes
// ============================================================================
// Cold path optimizations:
// 1. noinline: Reduces hot path code size → better i-cache
// 2. cold attribute: Compiler optimizes for size, not speed
// 3. Batch operations: Refill/drain multiple objects (amortize cost)
// 4. Defensive code: Full error checking (correctness > speed)
//
// Expected call frequency:
// - Refill: ~1-5% of allocations (depends on cache size)
// - Drain: ~1-5% of frees (depends on allocation pattern)
// - Error: <0.01% (only on actual errors)
//
// Impact on hot path:
// - Hot path stays small (~10-20 instructions)
// - Better i-cache locality (hot path doesn't include cold code)
// - CPU branch predictor learns hot path quickly
#endif // TINY_FRONT_COLD_BOX_H