Files
hakmem/core/box/tiny_tcache_box.h
Moe Charm (CI) f8fb05bc13 Phase 14 v1: Pointer-Chase Reduction (tcache) NEUTRAL (+0.20%)
Implementation:
- Intrusive LIFO tcache layer (L1) before UnifiedCache
- TLS per-class bins (head pointer + count)
- Intrusive next pointers (via tiny_next_store/load SSOT)
- Cap: 64 blocks per class (default)
- ENV: HAKMEM_TINY_TCACHE=0/1 (default: 0, OFF)

A/B Test Results (Mixed 10-run):
- Baseline (TCACHE=0): 51,083,379 ops/s
- Optimized (TCACHE=1): 51,186,838 ops/s
- Mean delta: +0.20% (below +1.0% GO threshold)
- Median delta: +0.59%

Verdict: NEUTRAL - Freeze as research box (default OFF)

Root Cause (v1 wiring incomplete):
- Free side pushes to tcache via unified_cache_push()
- Alloc hot path (tiny_hot_alloc_fast) doesn't consume tcache
- tcache becomes "sink" without alloc-side pop → ROI not measurable

Files:
- Created: core/box/tiny_tcache_{env_box,box}.h, tiny_tcache_env_box.c
- Modified: core/front/tiny_unified_cache.h (integration)
- Modified: core/bench_profile.h (refresh sync)
- Modified: Makefile (build integration)
- Results: docs/analysis/PHASE14_POINTER_CHASE_REDUCTION_1_AB_TEST_RESULTS.md
- v2 Instructions: docs/analysis/PHASE14_POINTER_CHASE_REDUCTION_2_NEXT_INSTRUCTIONS.md

Next: Phase 14 v2 (connect tcache to tiny_front_hot_box alloc/free hot path)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-15 01:28:50 +09:00

163 lines
5.1 KiB
C

// ============================================================================
// Phase 14 v1: Tiny tcache Box (L1) - Intrusive LIFO Cache
// ============================================================================
//
// Purpose: Per-class intrusive LIFO cache (tcache-style)
//
// Design: docs/analysis/PHASE14_POINTER_CHASE_REDUCTION_1_DESIGN.md
//
// Strategy:
// - Thread-local head pointers + count per class
// - Intrusive next pointers stored in blocks (via tiny_next_store/load)
// - Cap-limited LIFO (no overflow, delegate to UnifiedCache)
// - Hit path: O(1) pointer operations only (no array access)
//
// Invariants:
// - Only BASE pointers stored (never USER pointers)
// - count <= cap always
// - One block in tcache OR unified_cache, never both
// - Next pointer via tiny_next_store/load SSOT only
//
// API:
// tiny_tcache_try_push(class_idx, base) -> bool (handled?)
// tiny_tcache_try_pop(class_idx) -> void* (BASE or NULL)
//
// Safety:
// - Debug: assert count <= cap
// - Debug: assert base != NULL and reasonable range
// - Release: fast path, no checks
//
// ============================================================================
#ifndef TINY_TCACHE_BOX_H
#define TINY_TCACHE_BOX_H
#include <stdint.h>
#include <stdbool.h>
#include <assert.h>
#include "../hakmem_build_flags.h"
#include "../hakmem_tiny_config.h" // TINY_NUM_CLASSES
#include "../tiny_nextptr.h" // tiny_next_store/load SSOT
#include "tiny_tcache_env_box.h" // tiny_tcache_enabled/cap
// ============================================================================
// TLS State (per-thread, per-class)
// ============================================================================
typedef struct {
void* head; // BASE pointer to first block (or NULL)
uint16_t count; // Number of blocks in this tcache
} TinyTcacheBin;
// Thread-local storage: 8 classes (C0-C7)
static __thread TinyTcacheBin g_tiny_tcache_bins[TINY_NUM_CLASSES];
// ============================================================================
// Push (try to add block to tcache)
// ============================================================================
//
// Arguments:
// class_idx - Tiny class index (0-7)
// base - BASE pointer to freed block
//
// Returns:
// true - Block accepted into tcache
// false - Tcache full (overflow), caller should use unified_cache
//
// Side effects:
// - Writes intrusive next pointer into block (via tiny_next_store)
// - Updates head and count
//
static inline bool tiny_tcache_try_push(int class_idx, void* base) {
// ENV gate check (cached, should be fast)
if (!tiny_tcache_enabled()) {
return false; // Tcache disabled, fall through to unified_cache
}
TinyTcacheBin* bin = &g_tiny_tcache_bins[class_idx];
uint16_t cap = tiny_tcache_cap();
// Check capacity
if (bin->count >= cap) {
return false; // Overflow, delegate to unified_cache
}
// Debug: validate base pointer
#if !HAKMEM_BUILD_RELEASE
if (base == NULL || (uintptr_t)base < 4096) {
fprintf(stderr, "[TINY_TCACHE] BUG: invalid base=%p in try_push (class=%d)\n", base, class_idx);
abort();
}
#endif
// LIFO push: link block to current head
tiny_next_store(base, class_idx, bin->head);
bin->head = base;
bin->count++;
// Debug: check invariant
#if !HAKMEM_BUILD_RELEASE
assert(bin->count <= cap);
#endif
return true; // Block accepted
}
// ============================================================================
// Pop (try to get block from tcache)
// ============================================================================
//
// Arguments:
// class_idx - Tiny class index (0-7)
//
// Returns:
// BASE pointer - Block from tcache (LIFO order)
// NULL - Tcache empty, caller should use unified_cache
//
// Side effects:
// - Reads intrusive next pointer from block (via tiny_next_load)
// - Updates head and count
//
static inline void* tiny_tcache_try_pop(int class_idx) {
// ENV gate check (cached, should be fast)
if (!tiny_tcache_enabled()) {
return NULL; // Tcache disabled, fall through to unified_cache
}
TinyTcacheBin* bin = &g_tiny_tcache_bins[class_idx];
// Check if empty
if (bin->head == NULL) {
return NULL; // Miss, delegate to unified_cache
}
// LIFO pop: unlink head
void* base = bin->head;
void* next = tiny_next_load(base, class_idx);
bin->head = next;
bin->count--;
// Debug: validate popped pointer
#if !HAKMEM_BUILD_RELEASE
if (base == NULL || (uintptr_t)base < 4096) {
fprintf(stderr, "[TINY_TCACHE] BUG: invalid base=%p in try_pop (class=%d)\n", base, class_idx);
abort();
}
#endif
return base; // Hit (BASE pointer)
}
// ============================================================================
// Stats (optional, for diagnostics)
// ============================================================================
// Get current count for a class (debug/stats only)
static inline uint16_t tiny_tcache_count(int class_idx) {
return g_tiny_tcache_bins[class_idx].count;
}
#endif // TINY_TCACHE_BOX_H