Implement C6 ULTRA intrusive LIFO freelist with ENV gating: - Single-linked LIFO using next pointer at USER+1 offset - tiny_next_store/tiny_next_load for pointer access (single source of truth) - Segment learning via ss_fast_lookup (per-class seg_base/seg_end) - ENV gate: HAKMEM_TINY_C6_ULTRA_INTRUSIVE_FL (default OFF) - Counters: c6_ifl_push/pop/fallback in FREE_PATH_STATS Files: - core/box/tiny_ultra_tls_box.h: Added c6_head field for intrusive LIFO - core/box/tiny_ultra_tls_box.c: Pop/push with intrusive branching (case 6) - core/box/tiny_c6_ultra_intrusive_env_box.h: ENV gate (new) - core/box/tiny_c6_intrusive_freelist_box.h: L1 pure LIFO (new) - core/tiny_debug_ring.h: C6_IFL events - core/box/free_path_stats_box.h/c: c6_ifl_* counters A/B Test Results (1M iterations, ws=200, 257-512B): - ENV_OFF (array): 56.6 Mop/s avg - ENV_ON (intrusive): 57.6 Mop/s avg (+1.8%, within noise) - Counters verified: c6_ifl_push=265890, c6_ifl_pop=265815, fallback=0 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
212 lines
7.9 KiB
C
212 lines
7.9 KiB
C
// tiny_ultra_tls_box.c - Phase TLS-UNIFY-2a + TLS-UNIFY-3: Unified ULTRA TLS implementation
|
|
//
|
|
// Phase 1: Thin wrapper delegating to per-class TLS (completed)
|
|
// Phase 2a: Unified struct with array magazines for C4-C6 (completed)
|
|
// C7 remains in separate TinyC7Ultra box.
|
|
// Phase 3: C6 intrusive LIFO (current) - ENV gated
|
|
//
|
|
|
|
#include "tiny_ultra_tls_box.h"
|
|
#include "tiny_c7_ultra_box.h"
|
|
#include "free_path_stats_box.h"
|
|
#include "tiny_c6_ultra_intrusive_env_box.h" // Phase 3: ENV gate
|
|
#include "tiny_c6_intrusive_freelist_box.h" // Phase 3: L1 box
|
|
#include "../superslab/superslab_inline.h" // For ss_fast_lookup
|
|
#include "../tiny_debug_ring.h" // For ring visualization
|
|
|
|
#ifndef likely
|
|
#define likely(x) __builtin_expect(!!(x), 1)
|
|
#define unlikely(x) __builtin_expect(!!(x), 0)
|
|
#endif
|
|
|
|
// ============================================================================
|
|
// Phase TLS-UNIFY-2a: Unified TLS context for C4-C6
|
|
// ============================================================================
|
|
|
|
static __thread TinyUltraTlsCtx g_ultra_tls_ctx = {0};
|
|
|
|
TinyUltraTlsCtx* tiny_ultra_tls_ctx(void) {
|
|
return &g_ultra_tls_ctx;
|
|
}
|
|
|
|
// ============================================================================
|
|
// Phase TLS-UNIFY-2a: Pop from unified TLS (C4-C6) or C7 separate box
|
|
// ============================================================================
|
|
|
|
void* tiny_ultra_tls_pop(uint8_t class_idx) {
|
|
TinyUltraTlsCtx* ctx = &g_ultra_tls_ctx;
|
|
|
|
switch (class_idx) {
|
|
case 4:
|
|
if (likely(ctx->c4_count > 0)) {
|
|
return ctx->c4_freelist[--ctx->c4_count];
|
|
}
|
|
return NULL;
|
|
|
|
case 5:
|
|
if (likely(ctx->c5_count > 0)) {
|
|
return ctx->c5_freelist[--ctx->c5_count];
|
|
}
|
|
return NULL;
|
|
|
|
case 6:
|
|
if (tiny_c6_ultra_intrusive_enabled()) {
|
|
// Phase 3: intrusive LIFO
|
|
void* base = c6_ifl_pop(&ctx->c6_head);
|
|
if (base) {
|
|
ctx->c6_count--;
|
|
FREE_PATH_STAT_INC(c6_ifl_pop);
|
|
tiny_debug_ring_record(TINY_RING_EVENT_C6_IFL_POP, 6,
|
|
(uintptr_t)base, ctx->c6_count);
|
|
} else {
|
|
tiny_debug_ring_record(TINY_RING_EVENT_C6_IFL_EMPTY, 6,
|
|
0, ctx->c6_count);
|
|
}
|
|
return base;
|
|
} else {
|
|
// Fallback: array magazine
|
|
if (likely(ctx->c6_count > 0)) {
|
|
return ctx->c6_freelist[--ctx->c6_count];
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
case 7: {
|
|
// C7 uses separate TinyC7Ultra box (not unified)
|
|
tiny_c7_ultra_tls_t* c7ctx = tiny_c7_ultra_tls_get();
|
|
if (likely(c7ctx->count > 0)) {
|
|
return c7ctx->freelist[--c7ctx->count];
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
default:
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// Phase TLS-UNIFY-2a: Push to unified TLS (C4-C6) or C7 separate box
|
|
// ============================================================================
|
|
|
|
// Forward declaration for slow path
|
|
extern void so_free(int class_idx, void* ptr);
|
|
|
|
// Slow path: flush half of TLS cache and push to segment
|
|
static void tiny_ultra_tls_push_slow(uint8_t class_idx, void* base) {
|
|
// Convert BASE to USER pointer for so_free
|
|
void* user_ptr = (uint8_t*)base + 1;
|
|
so_free(class_idx, user_ptr);
|
|
}
|
|
|
|
void tiny_ultra_tls_push(uint8_t class_idx, void* base) {
|
|
TinyUltraTlsCtx* ctx = &g_ultra_tls_ctx;
|
|
uintptr_t addr = (uintptr_t)base;
|
|
|
|
switch (class_idx) {
|
|
case 4:
|
|
// Learn segment on first C4 free
|
|
if (unlikely(ctx->c4_seg_base == 0)) {
|
|
SuperSlab* ss = ss_fast_lookup(base);
|
|
if (ss != NULL) {
|
|
ctx->c4_seg_base = (uintptr_t)ss;
|
|
ctx->c4_seg_end = ctx->c4_seg_base + (1u << ss->lg_size);
|
|
}
|
|
}
|
|
// Check segment range and capacity
|
|
if (likely(ctx->c4_seg_base != 0 &&
|
|
addr >= ctx->c4_seg_base &&
|
|
addr < ctx->c4_seg_end &&
|
|
ctx->c4_count < TINY_ULTRA_C4_CAP)) {
|
|
ctx->c4_freelist[ctx->c4_count++] = base;
|
|
FREE_PATH_STAT_INC(c4_ultra_free_fast);
|
|
return;
|
|
}
|
|
tiny_ultra_tls_push_slow(class_idx, base);
|
|
break;
|
|
|
|
case 5:
|
|
// Learn segment on first C5 free
|
|
if (unlikely(ctx->c5_seg_base == 0)) {
|
|
SuperSlab* ss = ss_fast_lookup(base);
|
|
if (ss != NULL) {
|
|
ctx->c5_seg_base = (uintptr_t)ss;
|
|
ctx->c5_seg_end = ctx->c5_seg_base + (1u << ss->lg_size);
|
|
}
|
|
}
|
|
if (likely(ctx->c5_seg_base != 0 &&
|
|
addr >= ctx->c5_seg_base &&
|
|
addr < ctx->c5_seg_end &&
|
|
ctx->c5_count < TINY_ULTRA_C5_CAP)) {
|
|
ctx->c5_freelist[ctx->c5_count++] = base;
|
|
FREE_PATH_STAT_INC(c5_ultra_free_fast);
|
|
return;
|
|
}
|
|
tiny_ultra_tls_push_slow(class_idx, base);
|
|
break;
|
|
|
|
case 6:
|
|
// Learn segment on first C6 free (common for both modes)
|
|
if (unlikely(ctx->c6_seg_base == 0)) {
|
|
SuperSlab* ss = ss_fast_lookup(base);
|
|
if (ss != NULL) {
|
|
ctx->c6_seg_base = (uintptr_t)ss;
|
|
ctx->c6_seg_end = ctx->c6_seg_base + (1u << ss->lg_size);
|
|
}
|
|
}
|
|
// Check segment range and capacity (common)
|
|
if (likely(ctx->c6_seg_base != 0 &&
|
|
addr >= ctx->c6_seg_base &&
|
|
addr < ctx->c6_seg_end &&
|
|
ctx->c6_count < TINY_ULTRA_C6_CAP)) {
|
|
if (tiny_c6_ultra_intrusive_enabled()) {
|
|
// Phase 3: intrusive LIFO
|
|
c6_ifl_push(&ctx->c6_head, base);
|
|
ctx->c6_count++;
|
|
FREE_PATH_STAT_INC(c6_ifl_push);
|
|
FREE_PATH_STAT_INC(c6_ultra_free_fast);
|
|
tiny_debug_ring_record(TINY_RING_EVENT_C6_IFL_PUSH, 6,
|
|
(uintptr_t)base, ctx->c6_count);
|
|
} else {
|
|
// Fallback: array magazine
|
|
ctx->c6_freelist[ctx->c6_count++] = base;
|
|
FREE_PATH_STAT_INC(c6_ultra_free_fast);
|
|
}
|
|
return;
|
|
}
|
|
// Slow path (range out or cap exceeded)
|
|
if (tiny_c6_ultra_intrusive_enabled()) {
|
|
FREE_PATH_STAT_INC(c6_ifl_fallback);
|
|
}
|
|
tiny_ultra_tls_push_slow(class_idx, base);
|
|
break;
|
|
|
|
case 7: {
|
|
// C7 uses separate TinyC7Ultra box (not unified)
|
|
tiny_c7_ultra_tls_t* c7ctx = tiny_c7_ultra_tls_get();
|
|
if (unlikely(c7ctx->seg_base == 0)) {
|
|
SuperSlab* ss = ss_fast_lookup(base);
|
|
if (ss != NULL) {
|
|
c7ctx->seg_base = (uintptr_t)ss;
|
|
c7ctx->seg_end = c7ctx->seg_base + (1u << ss->lg_size);
|
|
}
|
|
}
|
|
if (likely(c7ctx->seg_base != 0 &&
|
|
addr >= c7ctx->seg_base &&
|
|
addr < c7ctx->seg_end &&
|
|
c7ctx->count < TINY_C7_ULTRA_CAP)) {
|
|
c7ctx->freelist[c7ctx->count++] = base;
|
|
FREE_PATH_STAT_INC(c7_ultra_fast);
|
|
return;
|
|
}
|
|
// Slow path for C7
|
|
void* user_ptr = (uint8_t*)base + 1;
|
|
so_free(7, user_ptr);
|
|
break;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|