Phase TLS-UNIFY-3: C6 intrusive freelist implementation (完成)

Implement C6 ULTRA intrusive LIFO freelist with ENV gating:
- Single-linked LIFO using next pointer at USER+1 offset
- tiny_next_store/tiny_next_load for pointer access (single source of truth)
- Segment learning via ss_fast_lookup (per-class seg_base/seg_end)
- ENV gate: HAKMEM_TINY_C6_ULTRA_INTRUSIVE_FL (default OFF)
- Counters: c6_ifl_push/pop/fallback in FREE_PATH_STATS

Files:
- core/box/tiny_ultra_tls_box.h: Added c6_head field for intrusive LIFO
- core/box/tiny_ultra_tls_box.c: Pop/push with intrusive branching (case 6)
- core/box/tiny_c6_ultra_intrusive_env_box.h: ENV gate (new)
- core/box/tiny_c6_intrusive_freelist_box.h: L1 pure LIFO (new)
- core/tiny_debug_ring.h: C6_IFL events
- core/box/free_path_stats_box.h/c: c6_ifl_* counters

A/B Test Results (1M iterations, ws=200, 257-512B):
- ENV_OFF (array): 56.6 Mop/s avg
- ENV_ON (intrusive): 57.6 Mop/s avg (+1.8%, within noise)
- Counters verified: c6_ifl_push=265890, c6_ifl_pop=265815, fallback=0

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-12-12 16:26:42 +09:00
parent bf83612b97
commit 1a8652a91a
18 changed files with 1268 additions and 217 deletions

View File

@ -44,5 +44,11 @@ static void free_path_stats_dump(void) {
g_free_path_stats.legacy_by_class[6],
g_free_path_stats.legacy_by_class[7]);
// Phase TLS-UNIFY-3: C6 Intrusive Freelist stats
fprintf(stderr, "[FREE_PATH_STATS_C6_IFL] push=%lu pop=%lu fallback=%lu\n",
g_free_path_stats.c6_ifl_push,
g_free_path_stats.c6_ifl_pop,
g_free_path_stats.c6_ifl_fallback);
fflush(stderr);
}

View File

@ -11,6 +11,9 @@ typedef struct FreePathStats {
uint64_t c7_ultra_fast;
uint64_t c6_ultra_free_fast; // Phase 4-2: C6 ULTRA-free
uint64_t c6_ultra_alloc_hit; // Phase 4-4: C6 ULTRA-alloc (TLS pop)
uint64_t c6_ifl_push; // Phase TLS-UNIFY-3: C6 intrusive push
uint64_t c6_ifl_pop; // Phase TLS-UNIFY-3: C6 intrusive pop
uint64_t c6_ifl_fallback; // Phase TLS-UNIFY-3: C6 intrusive fallback (slow)
uint64_t c5_ultra_free_fast; // Phase 5-1: C5 ULTRA-free
uint64_t c5_ultra_alloc_hit; // Phase 5-2: C5 ULTRA-alloc (TLS pop)
uint64_t c4_ultra_free_fast; // Phase 6: C4 ULTRA-free (cap=64)

View File

@ -0,0 +1,57 @@
// tiny_c6_intrusive_freelist_box.h - Phase TLS-UNIFY-3: C6 Intrusive Freelist L1 Box
//
// Pure LIFO operations on intrusive freelist (header-only / static inline).
// No side effects: does NOT touch seg/owner/remote/publish/stats.
//
// IMPORTANT: All next pointer access MUST go through tiny_next_* (no direct *(void**))
//
#ifndef HAKMEM_TINY_C6_INTRUSIVE_FREELIST_BOX_H
#define HAKMEM_TINY_C6_INTRUSIVE_FREELIST_BOX_H
#include <stdbool.h>
#include <stddef.h>
#include "../tiny_nextptr.h"
// ============================================================================
// C6 Fixed Wrappers (delegate to tiny_next_* "single source of truth")
// ============================================================================
// Load next pointer from freed block (at user offset = base+1)
static inline void* c6_ifl_next_load(void* base) {
return tiny_next_load(base, 6); // class_idx=6, off=1
}
// Store next pointer to freed block (at user offset = base+1)
static inline void c6_ifl_next_store(void* base, void* next) {
tiny_next_store(base, 6, next); // class_idx=6, off=1
}
// ============================================================================
// Pure LIFO Operations (no side effects)
// ============================================================================
// Push base to intrusive LIFO head
// Caller is responsible for count update and stats
static inline void c6_ifl_push(void** head, void* base) {
c6_ifl_next_store(base, *head);
*head = base;
}
// Pop from intrusive LIFO head
// Returns NULL if empty
// Caller is responsible for count update and stats
static inline void* c6_ifl_pop(void** head) {
void* base = *head;
if (base == NULL) {
return NULL;
}
*head = c6_ifl_next_load(base);
return base;
}
// Check if LIFO is empty
static inline bool c6_ifl_is_empty(void* head) {
return head == NULL;
}
#endif // HAKMEM_TINY_C6_INTRUSIVE_FREELIST_BOX_H

View File

@ -0,0 +1,22 @@
// tiny_c6_ultra_intrusive_env_box.h - Phase TLS-UNIFY-3: C6 Intrusive FL ENV gate
//
// ENV: HAKMEM_TINY_C6_ULTRA_INTRUSIVE_FL (default OFF)
// Separate from existing HAKMEM_TINY_C6_ULTRA_FREE_ENABLED
//
#ifndef HAKMEM_TINY_C6_ULTRA_INTRUSIVE_ENV_BOX_H
#define HAKMEM_TINY_C6_ULTRA_INTRUSIVE_ENV_BOX_H
#include <stdlib.h>
#include <stdbool.h>
// Cached ENV gate (read once on first call)
static inline bool tiny_c6_ultra_intrusive_enabled(void) {
static int g_enabled = -1; // -1 = not initialized
if (g_enabled < 0) {
const char* env = getenv("HAKMEM_TINY_C6_ULTRA_INTRUSIVE_FL");
g_enabled = (env && env[0] == '1') ? 1 : 0;
}
return g_enabled == 1;
}
#endif // HAKMEM_TINY_C6_ULTRA_INTRUSIVE_ENV_BOX_H

View File

@ -0,0 +1,211 @@
// tiny_ultra_tls_box.c - Phase TLS-UNIFY-2a + TLS-UNIFY-3: Unified ULTRA TLS implementation
//
// Phase 1: Thin wrapper delegating to per-class TLS (completed)
// Phase 2a: Unified struct with array magazines for C4-C6 (completed)
// C7 remains in separate TinyC7Ultra box.
// Phase 3: C6 intrusive LIFO (current) - ENV gated
//
#include "tiny_ultra_tls_box.h"
#include "tiny_c7_ultra_box.h"
#include "free_path_stats_box.h"
#include "tiny_c6_ultra_intrusive_env_box.h" // Phase 3: ENV gate
#include "tiny_c6_intrusive_freelist_box.h" // Phase 3: L1 box
#include "../superslab/superslab_inline.h" // For ss_fast_lookup
#include "../tiny_debug_ring.h" // For ring visualization
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#endif
// ============================================================================
// Phase TLS-UNIFY-2a: Unified TLS context for C4-C6
// ============================================================================
static __thread TinyUltraTlsCtx g_ultra_tls_ctx = {0};
TinyUltraTlsCtx* tiny_ultra_tls_ctx(void) {
return &g_ultra_tls_ctx;
}
// ============================================================================
// Phase TLS-UNIFY-2a: Pop from unified TLS (C4-C6) or C7 separate box
// ============================================================================
void* tiny_ultra_tls_pop(uint8_t class_idx) {
TinyUltraTlsCtx* ctx = &g_ultra_tls_ctx;
switch (class_idx) {
case 4:
if (likely(ctx->c4_count > 0)) {
return ctx->c4_freelist[--ctx->c4_count];
}
return NULL;
case 5:
if (likely(ctx->c5_count > 0)) {
return ctx->c5_freelist[--ctx->c5_count];
}
return NULL;
case 6:
if (tiny_c6_ultra_intrusive_enabled()) {
// Phase 3: intrusive LIFO
void* base = c6_ifl_pop(&ctx->c6_head);
if (base) {
ctx->c6_count--;
FREE_PATH_STAT_INC(c6_ifl_pop);
tiny_debug_ring_record(TINY_RING_EVENT_C6_IFL_POP, 6,
(uintptr_t)base, ctx->c6_count);
} else {
tiny_debug_ring_record(TINY_RING_EVENT_C6_IFL_EMPTY, 6,
0, ctx->c6_count);
}
return base;
} else {
// Fallback: array magazine
if (likely(ctx->c6_count > 0)) {
return ctx->c6_freelist[--ctx->c6_count];
}
return NULL;
}
case 7: {
// C7 uses separate TinyC7Ultra box (not unified)
tiny_c7_ultra_tls_t* c7ctx = tiny_c7_ultra_tls_get();
if (likely(c7ctx->count > 0)) {
return c7ctx->freelist[--c7ctx->count];
}
return NULL;
}
default:
return NULL;
}
}
// ============================================================================
// Phase TLS-UNIFY-2a: Push to unified TLS (C4-C6) or C7 separate box
// ============================================================================
// Forward declaration for slow path
extern void so_free(int class_idx, void* ptr);
// Slow path: flush half of TLS cache and push to segment
static void tiny_ultra_tls_push_slow(uint8_t class_idx, void* base) {
// Convert BASE to USER pointer for so_free
void* user_ptr = (uint8_t*)base + 1;
so_free(class_idx, user_ptr);
}
void tiny_ultra_tls_push(uint8_t class_idx, void* base) {
TinyUltraTlsCtx* ctx = &g_ultra_tls_ctx;
uintptr_t addr = (uintptr_t)base;
switch (class_idx) {
case 4:
// Learn segment on first C4 free
if (unlikely(ctx->c4_seg_base == 0)) {
SuperSlab* ss = ss_fast_lookup(base);
if (ss != NULL) {
ctx->c4_seg_base = (uintptr_t)ss;
ctx->c4_seg_end = ctx->c4_seg_base + (1u << ss->lg_size);
}
}
// Check segment range and capacity
if (likely(ctx->c4_seg_base != 0 &&
addr >= ctx->c4_seg_base &&
addr < ctx->c4_seg_end &&
ctx->c4_count < TINY_ULTRA_C4_CAP)) {
ctx->c4_freelist[ctx->c4_count++] = base;
FREE_PATH_STAT_INC(c4_ultra_free_fast);
return;
}
tiny_ultra_tls_push_slow(class_idx, base);
break;
case 5:
// Learn segment on first C5 free
if (unlikely(ctx->c5_seg_base == 0)) {
SuperSlab* ss = ss_fast_lookup(base);
if (ss != NULL) {
ctx->c5_seg_base = (uintptr_t)ss;
ctx->c5_seg_end = ctx->c5_seg_base + (1u << ss->lg_size);
}
}
if (likely(ctx->c5_seg_base != 0 &&
addr >= ctx->c5_seg_base &&
addr < ctx->c5_seg_end &&
ctx->c5_count < TINY_ULTRA_C5_CAP)) {
ctx->c5_freelist[ctx->c5_count++] = base;
FREE_PATH_STAT_INC(c5_ultra_free_fast);
return;
}
tiny_ultra_tls_push_slow(class_idx, base);
break;
case 6:
// Learn segment on first C6 free (common for both modes)
if (unlikely(ctx->c6_seg_base == 0)) {
SuperSlab* ss = ss_fast_lookup(base);
if (ss != NULL) {
ctx->c6_seg_base = (uintptr_t)ss;
ctx->c6_seg_end = ctx->c6_seg_base + (1u << ss->lg_size);
}
}
// Check segment range and capacity (common)
if (likely(ctx->c6_seg_base != 0 &&
addr >= ctx->c6_seg_base &&
addr < ctx->c6_seg_end &&
ctx->c6_count < TINY_ULTRA_C6_CAP)) {
if (tiny_c6_ultra_intrusive_enabled()) {
// Phase 3: intrusive LIFO
c6_ifl_push(&ctx->c6_head, base);
ctx->c6_count++;
FREE_PATH_STAT_INC(c6_ifl_push);
FREE_PATH_STAT_INC(c6_ultra_free_fast);
tiny_debug_ring_record(TINY_RING_EVENT_C6_IFL_PUSH, 6,
(uintptr_t)base, ctx->c6_count);
} else {
// Fallback: array magazine
ctx->c6_freelist[ctx->c6_count++] = base;
FREE_PATH_STAT_INC(c6_ultra_free_fast);
}
return;
}
// Slow path (range out or cap exceeded)
if (tiny_c6_ultra_intrusive_enabled()) {
FREE_PATH_STAT_INC(c6_ifl_fallback);
}
tiny_ultra_tls_push_slow(class_idx, base);
break;
case 7: {
// C7 uses separate TinyC7Ultra box (not unified)
tiny_c7_ultra_tls_t* c7ctx = tiny_c7_ultra_tls_get();
if (unlikely(c7ctx->seg_base == 0)) {
SuperSlab* ss = ss_fast_lookup(base);
if (ss != NULL) {
c7ctx->seg_base = (uintptr_t)ss;
c7ctx->seg_end = c7ctx->seg_base + (1u << ss->lg_size);
}
}
if (likely(c7ctx->seg_base != 0 &&
addr >= c7ctx->seg_base &&
addr < c7ctx->seg_end &&
c7ctx->count < TINY_C7_ULTRA_CAP)) {
c7ctx->freelist[c7ctx->count++] = base;
FREE_PATH_STAT_INC(c7_ultra_fast);
return;
}
// Slow path for C7
void* user_ptr = (uint8_t*)base + 1;
so_free(7, user_ptr);
break;
}
default:
break;
}
}

View File

@ -0,0 +1,78 @@
// tiny_ultra_tls_box.h - Phase TLS-UNIFY-1: Unified ULTRA TLS API
//
// Goal: Single API for C4-C7 ULTRA TLS operations
// Phase 1: Thin wrapper delegating to existing TinyC*UltraFreeTLS
// Phase 2: Replace with unified struct (1 cache line hot path)
//
#ifndef HAKMEM_TINY_ULTRA_TLS_BOX_H
#define HAKMEM_TINY_ULTRA_TLS_BOX_H
#include <stdint.h>
#include <stdbool.h>
// ============================================================================
// TinyUltraTlsCtx - Unified TLS context (Phase TLS-UNIFY-2a + TLS-UNIFY-3)
// ============================================================================
//
// Phase 1: Thin wrapper delegating to per-class TLS (completed)
// Phase 2a: Unified struct with array magazines for C4-C6 (completed)
// C7 remains in separate TinyC7Ultra box.
// Phase 3: C6 intrusive LIFO (current) - ENV gated
//
// Capacity constants
#define TINY_ULTRA_C4_CAP 64
#define TINY_ULTRA_C5_CAP 64
#define TINY_ULTRA_C6_CAP 128
typedef struct TinyUltraTlsCtx {
// Hot line: counts (8B aligned)
uint16_t c4_count;
uint16_t c5_count;
uint16_t c6_count;
uint16_t _pad_count;
// C6 intrusive LIFO head (Phase TLS-UNIFY-3)
// Used when HAKMEM_TINY_C6_ULTRA_INTRUSIVE_FL=1
void* c6_head;
// Per-class segment ranges (learned on first free)
uintptr_t c4_seg_base;
uintptr_t c4_seg_end;
uintptr_t c5_seg_base;
uintptr_t c5_seg_end;
uintptr_t c6_seg_base;
uintptr_t c6_seg_end;
// Per-class array magazines (C4/C5 always, C6 when intrusive OFF)
void* c4_freelist[TINY_ULTRA_C4_CAP]; // 512B
void* c5_freelist[TINY_ULTRA_C5_CAP]; // 512B
void* c6_freelist[TINY_ULTRA_C6_CAP]; // 1024B (kept for ENV_OFF fallback)
// Total: ~2KB per thread (acceptable for array magazine design)
// Note: C7 is NOT included here - uses separate TinyC7Ultra box
} TinyUltraTlsCtx;
// ============================================================================
// Unified API
// ============================================================================
// Get TLS context (Phase 1: returns dummy, Phase 2: returns actual unified ctx)
TinyUltraTlsCtx* tiny_ultra_tls_ctx(void);
// Pop BASE pointer from TLS freelist (C4-C7)
// Returns: BASE pointer on hit, NULL on miss (caller should fallback)
// class_idx: 4, 5, 6, or 7
void* tiny_ultra_tls_pop(uint8_t class_idx);
// Push BASE pointer to TLS freelist (C4-C7)
// class_idx: 4, 5, 6, or 7
// base: BASE pointer (not user pointer)
void tiny_ultra_tls_push(uint8_t class_idx, void* base);
// Check if unified TLS is enabled (ENV gate)
static inline int tiny_ultra_tls_unified_enabled(void) {
// Phase 1: Always enabled (thin wrapper mode)
return 1;
}
#endif // HAKMEM_TINY_ULTRA_TLS_BOX_H