2025-11-10 16:48:20 +09:00
|
|
|
|
// ============================================================================
|
|
|
|
|
|
// Box TLS-SLL API
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
#include "box/tls_sll_box.h"
|
2025-11-21 23:00:24 +09:00
|
|
|
|
#include "front/tiny_heap_v2.h"
|
|
|
|
|
|
|
|
|
|
|
|
// Optional: track alloc->class routing for sizes near 1KB (env: HAKMEM_TINY_ALLOC_1024_METRIC)
|
|
|
|
|
|
extern _Atomic uint64_t g_tiny_alloc_ge1024[TINY_NUM_CLASSES];
|
|
|
|
|
|
static inline void tiny_diag_track_size_ge1024(size_t req_size, int class_idx) {
|
|
|
|
|
|
if (__builtin_expect(req_size < 1024, 1)) return;
|
|
|
|
|
|
static int s_metric_en = -1;
|
|
|
|
|
|
if (__builtin_expect(s_metric_en == -1, 0)) {
|
|
|
|
|
|
const char* e = getenv("HAKMEM_TINY_ALLOC_1024_METRIC");
|
|
|
|
|
|
s_metric_en = (e && *e && *e != '0') ? 1 : 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (!__builtin_expect(s_metric_en, 0)) return;
|
|
|
|
|
|
|
|
|
|
|
|
if (__builtin_expect(class_idx >= 0 && class_idx < TINY_NUM_CLASSES, 1)) {
|
|
|
|
|
|
atomic_fetch_add_explicit(&g_tiny_alloc_ge1024[class_idx], 1, memory_order_relaxed);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
static _Atomic int g_metric_bad_class_once = 0;
|
|
|
|
|
|
if (atomic_fetch_add_explicit(&g_metric_bad_class_once, 1, memory_order_relaxed) == 0) {
|
|
|
|
|
|
fprintf(stderr, "[ALLOC_1024_METRIC] bad class_idx=%d size=%zu\n", class_idx, req_size);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2025-11-10 16:48:20 +09:00
|
|
|
|
|
2025-11-05 12:31:14 +09:00
|
|
|
|
// ============================================================================
|
|
|
|
|
|
// Step 3: Cold-path outline - Wrapper Context Handler
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
// Purpose: Handle allocations during wrapper calls (rare execution)
|
|
|
|
|
|
// Rationale: Avoid re-entrancy hazards with pthread locks during wrapper calls
|
|
|
|
|
|
// Step 3d: Force inline for readability without performance loss
|
|
|
|
|
|
__attribute__((always_inline))
|
|
|
|
|
|
static inline void* hak_tiny_alloc_wrapper(int class_idx) {
|
2025-11-07 01:27:04 +09:00
|
|
|
|
ROUTE_BEGIN(class_idx);
|
|
|
|
|
|
|
2025-11-05 12:31:14 +09:00
|
|
|
|
// Wrapper-context fast path: magazine-only (never take locks or refill)
|
|
|
|
|
|
tiny_small_mags_init_once();
|
|
|
|
|
|
if (__builtin_expect(class_idx > 3, 0)) tiny_mag_init_if_needed(class_idx);
|
|
|
|
|
|
TinyTLSMag* mag = &g_tls_mags[class_idx];
|
|
|
|
|
|
if (mag->top > 0) {
|
|
|
|
|
|
void* p = mag->items[--mag->top].ptr;
|
|
|
|
|
|
HAK_RET_ALLOC(class_idx, p);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Try TLS active slabs (owner-only, lock-free)
|
|
|
|
|
|
TinySlab* tls = g_tls_active_slab_a[class_idx];
|
|
|
|
|
|
if (!(tls && tls->free_count > 0)) tls = g_tls_active_slab_b[class_idx];
|
|
|
|
|
|
if (tls && tls->free_count > 0) {
|
|
|
|
|
|
tiny_remote_drain_owner(tls);
|
|
|
|
|
|
if (tls->free_count > 0) {
|
|
|
|
|
|
int block_idx = hak_tiny_find_free_block(tls);
|
|
|
|
|
|
if (block_idx >= 0) {
|
|
|
|
|
|
hak_tiny_set_used(tls, block_idx);
|
|
|
|
|
|
tls->free_count--;
|
|
|
|
|
|
size_t bs = g_tiny_class_sizes[class_idx];
|
|
|
|
|
|
void* p = (char*)tls->base + (block_idx * bs);
|
|
|
|
|
|
HAK_RET_ALLOC(class_idx, p);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Optional: attempt limited refill under trylock (no remote drain)
|
|
|
|
|
|
if (g_wrap_tiny_refill) {
|
|
|
|
|
|
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
|
|
|
|
|
|
if (pthread_mutex_trylock(lock) == 0) {
|
|
|
|
|
|
TinySlab* slab = g_tiny_pool.free_slabs[class_idx];
|
|
|
|
|
|
if (slab && slab->free_count > 0) {
|
|
|
|
|
|
int room = mag->cap - mag->top;
|
|
|
|
|
|
if (room > 16) room = 16; // wrapper refill is small and quick
|
|
|
|
|
|
if (room > slab->free_count) room = slab->free_count;
|
|
|
|
|
|
if (room > 0) {
|
|
|
|
|
|
size_t bs = g_tiny_class_sizes[class_idx];
|
|
|
|
|
|
void* ret = NULL;
|
|
|
|
|
|
for (int i = 0; i < room; i++) {
|
|
|
|
|
|
int idx = hak_tiny_find_free_block(slab);
|
|
|
|
|
|
if (idx < 0) break;
|
|
|
|
|
|
hak_tiny_set_used(slab, idx);
|
|
|
|
|
|
slab->free_count--;
|
|
|
|
|
|
void* p = (char*)slab->base + (idx * bs);
|
|
|
|
|
|
if (i < room - 1) {
|
|
|
|
|
|
mag->items[mag->top].ptr = p;
|
|
|
|
|
|
mag->top++;
|
|
|
|
|
|
} else {
|
|
|
|
|
|
ret = p; // return one directly
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
if (slab->free_count == 0) {
|
|
|
|
|
|
move_to_full_list(class_idx, slab);
|
|
|
|
|
|
}
|
|
|
|
|
|
pthread_mutex_unlock(lock);
|
|
|
|
|
|
if (ret) { HAK_RET_ALLOC(class_idx, ret); }
|
|
|
|
|
|
} else {
|
|
|
|
|
|
pthread_mutex_unlock(lock);
|
|
|
|
|
|
}
|
|
|
|
|
|
} else {
|
|
|
|
|
|
pthread_mutex_unlock(lock);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return NULL; // empty → fallback to next allocator tier
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void* hak_tiny_alloc(size_t size) {
|
|
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
|
|
|
|
if (!g_tiny_initialized) hak_tiny_init();
|
|
|
|
|
|
#else
|
|
|
|
|
|
if (__builtin_expect(!g_tiny_initialized, 0)) {
|
|
|
|
|
|
hak_tiny_init();
|
|
|
|
|
|
}
|
|
|
|
|
|
#endif
|
|
|
|
|
|
// Default (safe): Avoid using Tiny during wrapper calls(TLSガード or 関数)
|
|
|
|
|
|
// If HAKMEM_WRAP_TINY=1, allow Tiny even when called from wrapper.
|
|
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
|
|
|
|
# if HAKMEM_WRAPPER_TLS_GUARD
|
|
|
|
|
|
if (!g_wrap_tiny_enabled && __builtin_expect(g_tls_in_wrapper != 0, 0)) {
|
|
|
|
|
|
static int log1 = 0;
|
|
|
|
|
|
if (log1 < 2) { fprintf(stderr, "[DEBUG] Tiny blocked: in_wrapper\n"); log1++; }
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
}
|
|
|
|
|
|
# else
|
|
|
|
|
|
extern int hak_in_wrapper(void);
|
|
|
|
|
|
if (!g_wrap_tiny_enabled && __builtin_expect(hak_in_wrapper() != 0, 0)) {
|
|
|
|
|
|
static int log2 = 0;
|
|
|
|
|
|
if (log2 < 2) { fprintf(stderr, "[DEBUG] Tiny blocked: hak_in_wrapper\n"); log2++; }
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
}
|
|
|
|
|
|
# endif
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
// ========================================================================
|
|
|
|
|
|
// Cooperative stats polling (SIGUSR1 trigger safe point)
|
|
|
|
|
|
hak_tiny_stats_poll();
|
|
|
|
|
|
|
|
|
|
|
|
// ========================================================================
|
|
|
|
|
|
// Phase 6-1.5: Ultra-Simple Fast Path (when enabled)
|
|
|
|
|
|
// ========================================================================
|
|
|
|
|
|
// Design: "Simple Front + Smart Back" - inspired by Mid-Large HAKX +171%
|
|
|
|
|
|
// - 3-4 instruction fast path (Phase 6-1 style)
|
|
|
|
|
|
// - Existing SuperSlab + ACE + Learning backend
|
|
|
|
|
|
// Two variants:
|
|
|
|
|
|
// Phase 6-1.5: -DHAKMEM_TINY_PHASE6_ULTRA_SIMPLE=1 (alignment guessing)
|
|
|
|
|
|
// Phase 6-1.6: -DHAKMEM_TINY_PHASE6_METADATA=1 (metadata header)
|
|
|
|
|
|
#ifdef HAKMEM_TINY_PHASE6_ULTRA_SIMPLE
|
|
|
|
|
|
return hak_tiny_alloc_ultra_simple(size);
|
|
|
|
|
|
#elif defined(HAKMEM_TINY_PHASE6_METADATA)
|
|
|
|
|
|
return hak_tiny_alloc_metadata(size);
|
|
|
|
|
|
#endif
|
|
|
|
|
|
// ========================================================================
|
|
|
|
|
|
|
|
|
|
|
|
// 1. Size → class index
|
|
|
|
|
|
int class_idx = hak_tiny_size_to_class(size);
|
|
|
|
|
|
if (class_idx < 0) {
|
|
|
|
|
|
static int log3 = 0;
|
|
|
|
|
|
if (log3 < 2) { fprintf(stderr, "[DEBUG] Tiny blocked: class_idx < 0 for size %zu\n", size); log3++; }
|
|
|
|
|
|
return NULL; // >1KB
|
|
|
|
|
|
}
|
2025-11-21 23:00:24 +09:00
|
|
|
|
|
|
|
|
|
|
#define HAK_RET_ALLOC_WITH_METRIC(ptr) do { \
|
|
|
|
|
|
tiny_diag_track_size_ge1024(size, class_idx); \
|
|
|
|
|
|
HAK_RET_ALLOC(class_idx, (ptr)); \
|
|
|
|
|
|
} while(0)
|
|
|
|
|
|
|
2025-11-07 01:27:04 +09:00
|
|
|
|
// Route fingerprint begin (debug-only; no-op unless HAKMEM_ROUTE=1)
|
|
|
|
|
|
ROUTE_BEGIN(class_idx);
|
|
|
|
|
|
do {
|
|
|
|
|
|
static int g_alloc_ring = -1;
|
|
|
|
|
|
if (__builtin_expect(g_alloc_ring == -1, 0)) {
|
|
|
|
|
|
const char* e = getenv("HAKMEM_TINY_ALLOC_RING");
|
|
|
|
|
|
g_alloc_ring = (e && *e && *e != '0') ? 1 : 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (g_alloc_ring) {
|
|
|
|
|
|
tiny_debug_ring_record(TINY_RING_EVENT_ALLOC_ENTER, (uint16_t)class_idx, (void*)(uintptr_t)size, 0);
|
|
|
|
|
|
}
|
|
|
|
|
|
} while (0);
|
2025-11-05 12:31:14 +09:00
|
|
|
|
|
2025-11-21 23:00:24 +09:00
|
|
|
|
// Phase 13-A/B: Tiny Heap v2 front (tcache-like, A/B)
|
|
|
|
|
|
if (__builtin_expect(tiny_heap_v2_enabled() && front_prune_heapv2_enabled() && class_idx <= 3, 0)) {
|
|
|
|
|
|
void* base = tiny_heap_v2_alloc_by_class(class_idx);
|
Phase 13-A Step 1: TinyHeapV2 NO-REFILL L0 cache implementation
Implement TinyHeapV2 as a minimal "lucky hit" L0 cache that avoids
circular dependency with FastCache by eliminating self-refill.
Key Changes:
- New: core/front/tiny_heap_v2.h - NO-REFILL L0 cache implementation
- tiny_heap_v2_alloc(): Pop from magazine if available, else return NULL
- tiny_heap_v2_refill_mag(): No-op stub (no backend refill)
- ENV: HAKMEM_TINY_HEAP_V2=1 to enable
- ENV: HAKMEM_TINY_HEAP_V2_CLASS_MASK=bitmask (C0-C3 control)
- ENV: HAKMEM_TINY_HEAP_V2_STATS=1 to print statistics
- Modified: core/hakmem_tiny_alloc_new.inc - Add TinyHeapV2 hook
- Hook at entry point (after class_idx calculation)
- Fallback to existing front if TinyHeapV2 returns NULL
- Modified: core/hakmem_tiny_alloc.inc - Add hook for legacy path
- Modified: core/hakmem_tiny.c - Add TLS variables and stats wrapper
- TinyHeapV2Mag: Per-class magazine (capacity=16)
- TinyHeapV2Stats: Per-class counters (alloc_calls, mag_hits, etc.)
- tiny_heap_v2_print_stats(): Statistics output at exit
- New: TINY_HEAP_V2_TASK_SPEC.md - Phase 13 specification
Root Cause Fixed:
- BEFORE: TinyHeapV2 refilled from FastCache → circular dependency
- TinyHeapV2 intercepted all allocs → FastCache never populated
- Result: 100% backend OOM, 0% hit rate, 99% slowdown
- AFTER: TinyHeapV2 is passive L0 cache (no refill)
- Magazine empty → return NULL → existing front handles it
- Result: 0% overhead, stable baseline performance
A/B Test Results (100K iterations, fixed-size bench):
- C1 (8B): Baseline 9,688 ops/s → HeapV2 ON 9,762 ops/s (+0.76%)
- C2 (16B): Baseline 9,804 ops/s → HeapV2 ON 9,845 ops/s (+0.42%)
- C3 (32B): Baseline 9,840 ops/s → HeapV2 ON 9,814 ops/s (-0.26%)
- All within noise range: NO PERFORMANCE REGRESSION ✅
Statistics (HeapV2 ON, C1-C3):
- alloc_calls: 200K (hook works correctly)
- mag_hits: 0 (0%) - Magazine empty as expected
- refill_calls: 0 - No refill executed (circular dependency avoided)
- backend_oom: 0 - No backend access
Next Steps (Phase 13-A Step 2):
- Implement magazine supply strategy (from existing front or free path)
- Goal: Populate magazine with "leftover" blocks from existing pipeline
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-15 01:42:57 +09:00
|
|
|
|
if (base) {
|
2025-11-21 23:00:24 +09:00
|
|
|
|
front_metrics_heapv2_hit(class_idx);
|
|
|
|
|
|
HAK_RET_ALLOC_WITH_METRIC(base); // Header write + return USER pointer
|
|
|
|
|
|
} else {
|
|
|
|
|
|
front_metrics_heapv2_miss(class_idx);
|
Phase 13-A Step 1: TinyHeapV2 NO-REFILL L0 cache implementation
Implement TinyHeapV2 as a minimal "lucky hit" L0 cache that avoids
circular dependency with FastCache by eliminating self-refill.
Key Changes:
- New: core/front/tiny_heap_v2.h - NO-REFILL L0 cache implementation
- tiny_heap_v2_alloc(): Pop from magazine if available, else return NULL
- tiny_heap_v2_refill_mag(): No-op stub (no backend refill)
- ENV: HAKMEM_TINY_HEAP_V2=1 to enable
- ENV: HAKMEM_TINY_HEAP_V2_CLASS_MASK=bitmask (C0-C3 control)
- ENV: HAKMEM_TINY_HEAP_V2_STATS=1 to print statistics
- Modified: core/hakmem_tiny_alloc_new.inc - Add TinyHeapV2 hook
- Hook at entry point (after class_idx calculation)
- Fallback to existing front if TinyHeapV2 returns NULL
- Modified: core/hakmem_tiny_alloc.inc - Add hook for legacy path
- Modified: core/hakmem_tiny.c - Add TLS variables and stats wrapper
- TinyHeapV2Mag: Per-class magazine (capacity=16)
- TinyHeapV2Stats: Per-class counters (alloc_calls, mag_hits, etc.)
- tiny_heap_v2_print_stats(): Statistics output at exit
- New: TINY_HEAP_V2_TASK_SPEC.md - Phase 13 specification
Root Cause Fixed:
- BEFORE: TinyHeapV2 refilled from FastCache → circular dependency
- TinyHeapV2 intercepted all allocs → FastCache never populated
- Result: 100% backend OOM, 0% hit rate, 99% slowdown
- AFTER: TinyHeapV2 is passive L0 cache (no refill)
- Magazine empty → return NULL → existing front handles it
- Result: 0% overhead, stable baseline performance
A/B Test Results (100K iterations, fixed-size bench):
- C1 (8B): Baseline 9,688 ops/s → HeapV2 ON 9,762 ops/s (+0.76%)
- C2 (16B): Baseline 9,804 ops/s → HeapV2 ON 9,845 ops/s (+0.42%)
- C3 (32B): Baseline 9,840 ops/s → HeapV2 ON 9,814 ops/s (-0.26%)
- All within noise range: NO PERFORMANCE REGRESSION ✅
Statistics (HeapV2 ON, C1-C3):
- alloc_calls: 200K (hook works correctly)
- mag_hits: 0 (0%) - Magazine empty as expected
- refill_calls: 0 - No refill executed (circular dependency avoided)
- backend_oom: 0 - No backend access
Next Steps (Phase 13-A Step 2):
- Implement magazine supply strategy (from existing front or free path)
- Goal: Populate magazine with "leftover" blocks from existing pipeline
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-15 01:42:57 +09:00
|
|
|
|
}
|
2025-11-21 23:00:24 +09:00
|
|
|
|
// Fall through to existing front path if HeapV2 misses
|
Phase 13-A Step 1: TinyHeapV2 NO-REFILL L0 cache implementation
Implement TinyHeapV2 as a minimal "lucky hit" L0 cache that avoids
circular dependency with FastCache by eliminating self-refill.
Key Changes:
- New: core/front/tiny_heap_v2.h - NO-REFILL L0 cache implementation
- tiny_heap_v2_alloc(): Pop from magazine if available, else return NULL
- tiny_heap_v2_refill_mag(): No-op stub (no backend refill)
- ENV: HAKMEM_TINY_HEAP_V2=1 to enable
- ENV: HAKMEM_TINY_HEAP_V2_CLASS_MASK=bitmask (C0-C3 control)
- ENV: HAKMEM_TINY_HEAP_V2_STATS=1 to print statistics
- Modified: core/hakmem_tiny_alloc_new.inc - Add TinyHeapV2 hook
- Hook at entry point (after class_idx calculation)
- Fallback to existing front if TinyHeapV2 returns NULL
- Modified: core/hakmem_tiny_alloc.inc - Add hook for legacy path
- Modified: core/hakmem_tiny.c - Add TLS variables and stats wrapper
- TinyHeapV2Mag: Per-class magazine (capacity=16)
- TinyHeapV2Stats: Per-class counters (alloc_calls, mag_hits, etc.)
- tiny_heap_v2_print_stats(): Statistics output at exit
- New: TINY_HEAP_V2_TASK_SPEC.md - Phase 13 specification
Root Cause Fixed:
- BEFORE: TinyHeapV2 refilled from FastCache → circular dependency
- TinyHeapV2 intercepted all allocs → FastCache never populated
- Result: 100% backend OOM, 0% hit rate, 99% slowdown
- AFTER: TinyHeapV2 is passive L0 cache (no refill)
- Magazine empty → return NULL → existing front handles it
- Result: 0% overhead, stable baseline performance
A/B Test Results (100K iterations, fixed-size bench):
- C1 (8B): Baseline 9,688 ops/s → HeapV2 ON 9,762 ops/s (+0.76%)
- C2 (16B): Baseline 9,804 ops/s → HeapV2 ON 9,845 ops/s (+0.42%)
- C3 (32B): Baseline 9,840 ops/s → HeapV2 ON 9,814 ops/s (-0.26%)
- All within noise range: NO PERFORMANCE REGRESSION ✅
Statistics (HeapV2 ON, C1-C3):
- alloc_calls: 200K (hook works correctly)
- mag_hits: 0 (0%) - Magazine empty as expected
- refill_calls: 0 - No refill executed (circular dependency avoided)
- backend_oom: 0 - No backend access
Next Steps (Phase 13-A Step 2):
- Implement magazine supply strategy (from existing front or free path)
- Goal: Populate magazine with "leftover" blocks from existing pipeline
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-15 01:42:57 +09:00
|
|
|
|
}
|
|
|
|
|
|
|
2025-11-05 12:31:14 +09:00
|
|
|
|
#if HAKMEM_TINY_MINIMAL_FRONT
|
|
|
|
|
|
// Minimal Front for hot tiny classes (bench-focused):
|
|
|
|
|
|
// SLL direct pop → minimal refill → pop, bypassing other layers.
|
|
|
|
|
|
if (__builtin_expect(class_idx <= 3, 1)) {
|
2025-11-10 16:48:20 +09:00
|
|
|
|
void* head = NULL;
|
|
|
|
|
|
if (tls_sll_pop(class_idx, &head)) {
|
2025-11-21 23:00:24 +09:00
|
|
|
|
HAK_RET_ALLOC_WITH_METRIC(head);
|
2025-11-05 12:31:14 +09:00
|
|
|
|
}
|
|
|
|
|
|
// Refill a small batch directly from TLS-cached SuperSlab
|
2025-11-09 22:12:34 +09:00
|
|
|
|
#if HAKMEM_TINY_P0_BATCH_REFILL
|
|
|
|
|
|
(void)sll_refill_batch_from_ss(class_idx, 32);
|
|
|
|
|
|
#else
|
2025-11-05 12:31:14 +09:00
|
|
|
|
(void)sll_refill_small_from_ss(class_idx, 32);
|
2025-11-09 22:12:34 +09:00
|
|
|
|
#endif
|
2025-11-10 16:48:20 +09:00
|
|
|
|
if (tls_sll_pop(class_idx, &head)) {
|
2025-11-21 23:00:24 +09:00
|
|
|
|
HAK_RET_ALLOC_WITH_METRIC(head);
|
2025-11-05 12:31:14 +09:00
|
|
|
|
}
|
|
|
|
|
|
// Fall through to slow path if still empty
|
|
|
|
|
|
}
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
ENV Cleanup: Delete Ultra HEAP & BG Remote dead code (-1,096 LOC)
Deleted files (11):
- core/ultra/ directory (6 files: tiny_ultra_heap.*, tiny_ultra_page_arena.*)
- core/front/tiny_ultrafront.h
- core/tiny_ultra_fast.inc.h
- core/hakmem_tiny_ultra_front.inc.h
- core/hakmem_tiny_ultra_simple.inc
- core/hakmem_tiny_ultra_batch_box.inc
Edited files (10):
- core/hakmem_tiny.c: Remove Ultra HEAP #includes, move ultra_batch_for_class()
- core/hakmem_tiny_tls_state_box.inc: Delete TinyUltraFront, g_ultra_simple
- core/hakmem_tiny_phase6_wrappers_box.inc: Delete ULTRA_SIMPLE block
- core/hakmem_tiny_alloc.inc: Delete Ultra-Front code block
- core/hakmem_tiny_init.inc: Delete ULTRA_SIMPLE ENV loading
- core/hakmem_tiny_remote_target.{c,h}: Delete g_bg_remote_enable/batch
- core/tiny_refill.h: Remove BG Remote check (always break)
- core/hakmem_tiny_background.inc: Delete BG Remote drain loop
Deleted ENV variables:
- HAKMEM_TINY_ULTRA_HEAP (build flag, undefined)
- HAKMEM_TINY_ULTRA_L0
- HAKMEM_TINY_ULTRA_HEAP_DUMP
- HAKMEM_TINY_ULTRA_PAGE_DUMP
- HAKMEM_TINY_ULTRA_FRONT
- HAKMEM_TINY_BG_REMOTE (no getenv, dead code)
- HAKMEM_TINY_BG_REMOTE_BATCH (no getenv, dead code)
- HAKMEM_TINY_ULTRA_SIMPLE (references only)
Impact:
- Code reduction: -1,096 lines
- Binary size: 305KB → 304KB (-1KB)
- Build: PASS
- Sanity: 15.69M ops/s (3 runs avg)
- Larson: 1 crash observed (seed 43, likely existing instability)
Notes:
- Ultra HEAP never compiled (#if HAKMEM_TINY_ULTRA_HEAP undefined)
- BG Remote variables never initialized (g_bg_remote_enable always 0)
- Ultra SLIM (ultra_slim_alloc_box.h) preserved (active 4-layer path)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 04:35:47 +09:00
|
|
|
|
// Ultra-Front - REMOVED (dead code cleanup 2025-11-27)
|
2025-11-05 12:31:14 +09:00
|
|
|
|
|
|
|
|
|
|
if (__builtin_expect(!g_debug_fast0, 1)) {
|
|
|
|
|
|
#ifdef HAKMEM_TINY_BENCH_FASTPATH
|
|
|
|
|
|
if (__builtin_expect(class_idx <= HAKMEM_TINY_BENCH_TINY_CLASSES, 1)) {
|
|
|
|
|
|
if (__builtin_expect(class_idx <= 3, 1)) {
|
|
|
|
|
|
unsigned char* done = &g_tls_bench_warm_done[class_idx];
|
|
|
|
|
|
if (__builtin_expect(*done == 0, 0)) {
|
|
|
|
|
|
int warm = (class_idx == 0) ? HAKMEM_TINY_BENCH_WARMUP8 :
|
|
|
|
|
|
(class_idx == 1) ? HAKMEM_TINY_BENCH_WARMUP16 :
|
|
|
|
|
|
(class_idx == 2) ? HAKMEM_TINY_BENCH_WARMUP32 :
|
|
|
|
|
|
HAKMEM_TINY_BENCH_WARMUP64;
|
2025-11-09 22:12:34 +09:00
|
|
|
|
#if HAKMEM_TINY_P0_BATCH_REFILL
|
|
|
|
|
|
if (warm > 0) (void)sll_refill_batch_from_ss(class_idx, warm);
|
|
|
|
|
|
#else
|
2025-11-05 12:31:14 +09:00
|
|
|
|
if (warm > 0) (void)sll_refill_small_from_ss(class_idx, warm);
|
2025-11-09 22:12:34 +09:00
|
|
|
|
#endif
|
2025-11-05 12:31:14 +09:00
|
|
|
|
*done = 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
#ifndef HAKMEM_TINY_BENCH_SLL_ONLY
|
|
|
|
|
|
tiny_small_mags_init_once();
|
|
|
|
|
|
if (class_idx > 3) tiny_mag_init_if_needed(class_idx);
|
|
|
|
|
|
#endif
|
2025-11-10 16:48:20 +09:00
|
|
|
|
void* head = NULL;
|
|
|
|
|
|
if (tls_sll_pop(class_idx, &head)) {
|
2025-11-05 12:31:14 +09:00
|
|
|
|
tiny_debug_ring_record(TINY_RING_EVENT_ALLOC_SUCCESS, (uint16_t)class_idx, head, 0);
|
2025-11-21 23:00:24 +09:00
|
|
|
|
HAK_RET_ALLOC_WITH_METRIC(head);
|
2025-11-05 12:31:14 +09:00
|
|
|
|
}
|
|
|
|
|
|
#ifndef HAKMEM_TINY_BENCH_SLL_ONLY
|
|
|
|
|
|
TinyTLSMag* mag = &g_tls_mags[class_idx];
|
|
|
|
|
|
int t = mag->top;
|
|
|
|
|
|
if (__builtin_expect(t > 0, 1)) {
|
|
|
|
|
|
void* p = mag->items[--t].ptr;
|
|
|
|
|
|
mag->top = t;
|
|
|
|
|
|
tiny_debug_ring_record(TINY_RING_EVENT_ALLOC_SUCCESS, (uint16_t)class_idx, p, 1);
|
2025-11-21 23:00:24 +09:00
|
|
|
|
HAK_RET_ALLOC_WITH_METRIC(p);
|
2025-11-05 12:31:14 +09:00
|
|
|
|
}
|
|
|
|
|
|
#endif
|
|
|
|
|
|
int bench_refill = (class_idx == 0) ? HAKMEM_TINY_BENCH_REFILL8 :
|
|
|
|
|
|
(class_idx == 1) ? HAKMEM_TINY_BENCH_REFILL16 :
|
|
|
|
|
|
(class_idx == 2) ? HAKMEM_TINY_BENCH_REFILL32 :
|
|
|
|
|
|
HAKMEM_TINY_BENCH_REFILL64;
|
2025-11-09 22:12:34 +09:00
|
|
|
|
#if HAKMEM_TINY_P0_BATCH_REFILL
|
|
|
|
|
|
if (__builtin_expect(sll_refill_batch_from_ss(class_idx, bench_refill) > 0, 0)) {
|
|
|
|
|
|
#else
|
2025-11-05 12:31:14 +09:00
|
|
|
|
if (__builtin_expect(sll_refill_small_from_ss(class_idx, bench_refill) > 0, 0)) {
|
2025-11-09 22:12:34 +09:00
|
|
|
|
#endif
|
2025-11-10 16:48:20 +09:00
|
|
|
|
if (tls_sll_pop(class_idx, &head)) {
|
2025-11-05 12:31:14 +09:00
|
|
|
|
tiny_debug_ring_record(TINY_RING_EVENT_ALLOC_SUCCESS, (uint16_t)class_idx, head, 2);
|
2025-11-21 23:00:24 +09:00
|
|
|
|
HAK_RET_ALLOC_WITH_METRIC(head);
|
2025-11-05 12:31:14 +09:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
// fallthrough to slow path on miss
|
|
|
|
|
|
}
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
// TinyHotMag front: fast-tierが枯渇したとき、キャッシュを再補充してから利用する
|
|
|
|
|
|
if (__builtin_expect(g_hotmag_enable && class_idx <= 2 && g_fast_head[class_idx] == NULL, 0)) {
|
|
|
|
|
|
hotmag_init_if_needed(class_idx);
|
|
|
|
|
|
TinyHotMag* hm = &g_tls_hot_mag[class_idx];
|
|
|
|
|
|
void* hotmag_ptr = hotmag_pop(class_idx);
|
|
|
|
|
|
if (__builtin_expect(hotmag_ptr == NULL, 0)) {
|
|
|
|
|
|
if (hotmag_try_refill(class_idx, hm) > 0) {
|
|
|
|
|
|
hotmag_ptr = hotmag_pop(class_idx);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
if (__builtin_expect(hotmag_ptr != NULL, 1)) {
|
|
|
|
|
|
tiny_debug_ring_record(TINY_RING_EVENT_ALLOC_SUCCESS, (uint16_t)class_idx, hotmag_ptr, 3);
|
2025-11-21 23:00:24 +09:00
|
|
|
|
HAK_RET_ALLOC_WITH_METRIC(hotmag_ptr);
|
2025-11-05 12:31:14 +09:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (g_hot_alloc_fn[class_idx] != NULL) {
|
|
|
|
|
|
void* fast_hot = NULL;
|
|
|
|
|
|
switch (class_idx) {
|
|
|
|
|
|
case 0:
|
|
|
|
|
|
fast_hot = tiny_hot_pop_class0();
|
|
|
|
|
|
break;
|
|
|
|
|
|
case 1:
|
|
|
|
|
|
fast_hot = tiny_hot_pop_class1();
|
|
|
|
|
|
break;
|
|
|
|
|
|
case 2:
|
|
|
|
|
|
fast_hot = tiny_hot_pop_class2();
|
|
|
|
|
|
break;
|
|
|
|
|
|
case 3:
|
|
|
|
|
|
fast_hot = tiny_hot_pop_class3();
|
|
|
|
|
|
break;
|
|
|
|
|
|
default:
|
|
|
|
|
|
fast_hot = NULL;
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (__builtin_expect(fast_hot != NULL, 1)) {
|
|
|
|
|
|
#if HAKMEM_BUILD_DEBUG
|
|
|
|
|
|
g_tls_hit_count[class_idx]++;
|
|
|
|
|
|
#endif
|
|
|
|
|
|
tiny_debug_ring_record(TINY_RING_EVENT_ALLOC_SUCCESS, (uint16_t)class_idx, fast_hot, 4);
|
2025-11-21 23:00:24 +09:00
|
|
|
|
HAK_RET_ALLOC_WITH_METRIC(fast_hot);
|
2025-11-05 12:31:14 +09:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void* fast = tiny_fast_pop(class_idx);
|
|
|
|
|
|
if (__builtin_expect(fast != NULL, 0)) {
|
|
|
|
|
|
#if HAKMEM_BUILD_DEBUG
|
|
|
|
|
|
g_tls_hit_count[class_idx]++;
|
|
|
|
|
|
#endif
|
|
|
|
|
|
tiny_debug_ring_record(TINY_RING_EVENT_ALLOC_SUCCESS, (uint16_t)class_idx, fast, 5);
|
2025-11-21 23:00:24 +09:00
|
|
|
|
HAK_RET_ALLOC_WITH_METRIC(fast);
|
2025-11-05 12:31:14 +09:00
|
|
|
|
}
|
|
|
|
|
|
} else {
|
|
|
|
|
|
tiny_debug_ring_record(TINY_RING_EVENT_FRONT_BYPASS, (uint16_t)class_idx, NULL, 0);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void* slow_ptr = hak_tiny_alloc_slow(size, class_idx);
|
|
|
|
|
|
if (slow_ptr) {
|
|
|
|
|
|
tiny_debug_ring_record(TINY_RING_EVENT_ALLOC_SUCCESS, (uint16_t)class_idx, slow_ptr, 6);
|
2025-11-21 23:00:24 +09:00
|
|
|
|
HAK_RET_ALLOC_WITH_METRIC(slow_ptr); // Increment stats for slow path success
|
2025-11-05 12:31:14 +09:00
|
|
|
|
}
|
2025-11-28 00:45:27 +09:00
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
2025-11-05 12:31:14 +09:00
|
|
|
|
tiny_alloc_dump_tls_state(class_idx, "fail", &g_tls_slabs[class_idx]);
|
2025-11-28 00:45:27 +09:00
|
|
|
|
#endif
|
2025-11-05 12:31:14 +09:00
|
|
|
|
tiny_debug_ring_record(TINY_RING_EVENT_ALLOC_NULL, (uint16_t)class_idx, NULL, 0);
|
|
|
|
|
|
return slow_ptr;
|
|
|
|
|
|
}
|
2025-11-21 23:00:24 +09:00
|
|
|
|
|
|
|
|
|
|
#undef HAK_RET_ALLOC_WITH_METRIC
|