Key changes: - Phase 83-1: Switch dispatch fixed mode (tiny_inline_slots_switch_dispatch_fixed_box) - NO-GO (marginal +0.32%, branch reduction negligible) Reason: lazy-init pattern already optimal, Phase 78-1 pattern shows diminishing returns - Allocator comparison baseline update (10-run SSOT, WS=400, ITERS=20M): tcmalloc: 115.26M (92.33% of mimalloc) jemalloc: 97.39M (77.96% of mimalloc) system: 85.20M (68.24% of mimalloc) mimalloc: 124.82M (baseline) - hakmem PROFILE correction: scripts/run_mixed_10_cleanenv.sh + run_allocator_quick_matrix.sh PROFILE explicitly set to MIXED_TINYV3_C7_SAFE for hakmem measurements Result: baseline stabilized to 55.53M (44.46% of mimalloc) Previous unstable measurement (35.57M) was due to profile leak - Documentation: * PERFORMANCE_TARGETS_SCORECARD.md: Reference allocators + M1/M2 milestone status * PHASE83_1_SWITCH_DISPATCH_FIXED_RESULTS.md: Phase 83-1 analysis (NO-GO) * ALLOCATOR_COMPARISON_QUICK_RUNBOOK.md: Quick comparison procedure * ALLOCATOR_COMPARISON_SSOT.md: Detailed SSOT methodology - M2 milestone status: 44.46% (target 55%, gap -10.54pp) - structural improvements needed 🤖 Generated with Claude Code Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
90 lines
3.4 KiB
C
90 lines
3.4 KiB
C
// tiny_c6_inline_slots.h - Phase 75-1: C6 Inline Slots Fast-Path API
|
|
//
|
|
// Goal: Zero-overhead fast-path API for C6 inline slot operations
|
|
// Scope: C6 class only (57.2% of C4-C7 operations in Mixed SSOT)
|
|
// Design: Always-inline, fail-fast to unified_cache on FULL/empty
|
|
//
|
|
// Performance Target:
|
|
// - Push: 1-2 cycles (ring index update, no bounds check)
|
|
// - Pop: 1-2 cycles (ring index update, null check)
|
|
// - Fallback: Silent delegation to unified_cache (existing path)
|
|
//
|
|
// Integration Points:
|
|
// - Alloc: Try c6_inline_pop() first, fallback to unified_cache_pop()
|
|
// - Free: Try c6_inline_push() first, fallback to unified_cache_push()
|
|
//
|
|
// Safety:
|
|
// - Caller must check c6_inline_enabled() before calling
|
|
// - Caller must handle NULL return (pop) or full condition (push)
|
|
// - No internal checks (fail-fast design)
|
|
|
|
#ifndef HAK_FRONT_TINY_C6_INLINE_SLOTS_H
|
|
#define HAK_FRONT_TINY_C6_INLINE_SLOTS_H
|
|
|
|
#include <stdint.h>
|
|
#include "../box/tiny_c6_inline_slots_env_box.h"
|
|
#include "../box/tiny_c6_inline_slots_tls_box.h"
|
|
#include "../box/tiny_inline_slots_fixed_mode_box.h"
|
|
|
|
// ============================================================================
|
|
// Fast-Path API (always_inline for zero branch overhead)
|
|
// ============================================================================
|
|
|
|
// Push to C6 inline slots (free path)
|
|
// Returns: 1 on success, 0 if full (caller must fallback to unified_cache)
|
|
// Precondition: ptr is valid BASE pointer for C6 class
|
|
__attribute__((always_inline))
|
|
static inline int c6_inline_push(TinyC6InlineSlots* slots, void* ptr) {
|
|
// Full check (single branch, likely taken in steady state)
|
|
if (__builtin_expect(c6_inline_full(slots), 0)) {
|
|
return 0; // Full, caller must fallback
|
|
}
|
|
|
|
// Push to tail (FIFO producer)
|
|
slots->slots[slots->tail] = ptr;
|
|
slots->tail = (slots->tail + 1) % TINY_C6_INLINE_CAPACITY;
|
|
|
|
return 1; // Success
|
|
}
|
|
|
|
// Pop from C6 inline slots (alloc path)
|
|
// Returns: BASE pointer on success, NULL if empty (caller must fallback to unified_cache)
|
|
// Precondition: slots is initialized and enabled
|
|
__attribute__((always_inline))
|
|
static inline void* c6_inline_pop(TinyC6InlineSlots* slots) {
|
|
// Empty check (single branch, likely NOT taken in steady state)
|
|
if (__builtin_expect(c6_inline_empty(slots), 0)) {
|
|
return NULL; // Empty, caller must fallback
|
|
}
|
|
|
|
// Pop from head (FIFO consumer)
|
|
void* ptr = slots->slots[slots->head];
|
|
slots->head = (slots->head + 1) % TINY_C6_INLINE_CAPACITY;
|
|
|
|
return ptr; // BASE pointer (caller converts to USER)
|
|
}
|
|
|
|
// ============================================================================
|
|
// Integration Helpers (for malloc_tiny_fast.h integration)
|
|
// ============================================================================
|
|
|
|
// Get TLS instance (wraps extern TLS variable)
|
|
static inline TinyC6InlineSlots* c6_inline_tls(void) {
|
|
return &g_tiny_c6_inline_slots;
|
|
}
|
|
|
|
// Check if C6 inline is enabled AND initialized (combined gate)
|
|
// Returns: 1 if ready to use, 0 if disabled or uninitialized
|
|
static inline int c6_inline_ready(void) {
|
|
if (!tiny_c6_inline_slots_enabled_fast()) {
|
|
return 0;
|
|
}
|
|
|
|
// TLS init check (once per thread)
|
|
// Note: In production, this check can be eliminated if TLS init is guaranteed
|
|
TinyC6InlineSlots* slots = c6_inline_tls();
|
|
return (slots->slots != NULL || slots->head == 0); // Initialized if zero or non-null
|
|
}
|
|
|
|
#endif // HAK_FRONT_TINY_C6_INLINE_SLOTS_H
|