Files
hakmem/core/hakmem_tiny_alloc.inc
Moe Charm (CI) 6b791b97d4 ENV Cleanup: Delete Ultra HEAP & BG Remote dead code (-1,096 LOC)
Deleted files (11):
- core/ultra/ directory (6 files: tiny_ultra_heap.*, tiny_ultra_page_arena.*)
- core/front/tiny_ultrafront.h
- core/tiny_ultra_fast.inc.h
- core/hakmem_tiny_ultra_front.inc.h
- core/hakmem_tiny_ultra_simple.inc
- core/hakmem_tiny_ultra_batch_box.inc

Edited files (10):
- core/hakmem_tiny.c: Remove Ultra HEAP #includes, move ultra_batch_for_class()
- core/hakmem_tiny_tls_state_box.inc: Delete TinyUltraFront, g_ultra_simple
- core/hakmem_tiny_phase6_wrappers_box.inc: Delete ULTRA_SIMPLE block
- core/hakmem_tiny_alloc.inc: Delete Ultra-Front code block
- core/hakmem_tiny_init.inc: Delete ULTRA_SIMPLE ENV loading
- core/hakmem_tiny_remote_target.{c,h}: Delete g_bg_remote_enable/batch
- core/tiny_refill.h: Remove BG Remote check (always break)
- core/hakmem_tiny_background.inc: Delete BG Remote drain loop

Deleted ENV variables:
- HAKMEM_TINY_ULTRA_HEAP (build flag, undefined)
- HAKMEM_TINY_ULTRA_L0
- HAKMEM_TINY_ULTRA_HEAP_DUMP
- HAKMEM_TINY_ULTRA_PAGE_DUMP
- HAKMEM_TINY_ULTRA_FRONT
- HAKMEM_TINY_BG_REMOTE (no getenv, dead code)
- HAKMEM_TINY_BG_REMOTE_BATCH (no getenv, dead code)
- HAKMEM_TINY_ULTRA_SIMPLE (references only)

Impact:
- Code reduction: -1,096 lines
- Binary size: 305KB → 304KB (-1KB)
- Build: PASS
- Sanity: 15.69M ops/s (3 runs avg)
- Larson: 1 crash observed (seed 43, likely existing instability)

Notes:
- Ultra HEAP never compiled (#if HAKMEM_TINY_ULTRA_HEAP undefined)
- BG Remote variables never initialized (g_bg_remote_enable always 0)
- Ultra SLIM (ultra_slim_alloc_box.h) preserved (active 4-layer path)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 04:35:47 +09:00

336 lines
14 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// ============================================================================
// Box TLS-SLL API
// ============================================================================
#include "box/tls_sll_box.h"
#include "front/tiny_heap_v2.h"
// Optional: track alloc->class routing for sizes near 1KB (env: HAKMEM_TINY_ALLOC_1024_METRIC)
extern _Atomic uint64_t g_tiny_alloc_ge1024[TINY_NUM_CLASSES];
static inline void tiny_diag_track_size_ge1024(size_t req_size, int class_idx) {
if (__builtin_expect(req_size < 1024, 1)) return;
static int s_metric_en = -1;
if (__builtin_expect(s_metric_en == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_ALLOC_1024_METRIC");
s_metric_en = (e && *e && *e != '0') ? 1 : 0;
}
if (!__builtin_expect(s_metric_en, 0)) return;
if (__builtin_expect(class_idx >= 0 && class_idx < TINY_NUM_CLASSES, 1)) {
atomic_fetch_add_explicit(&g_tiny_alloc_ge1024[class_idx], 1, memory_order_relaxed);
} else {
static _Atomic int g_metric_bad_class_once = 0;
if (atomic_fetch_add_explicit(&g_metric_bad_class_once, 1, memory_order_relaxed) == 0) {
fprintf(stderr, "[ALLOC_1024_METRIC] bad class_idx=%d size=%zu\n", class_idx, req_size);
}
}
}
// ============================================================================
// Step 3: Cold-path outline - Wrapper Context Handler
// ============================================================================
// Purpose: Handle allocations during wrapper calls (rare execution)
// Rationale: Avoid re-entrancy hazards with pthread locks during wrapper calls
// Step 3d: Force inline for readability without performance loss
__attribute__((always_inline))
static inline void* hak_tiny_alloc_wrapper(int class_idx) {
ROUTE_BEGIN(class_idx);
// Wrapper-context fast path: magazine-only (never take locks or refill)
tiny_small_mags_init_once();
if (__builtin_expect(class_idx > 3, 0)) tiny_mag_init_if_needed(class_idx);
TinyTLSMag* mag = &g_tls_mags[class_idx];
if (mag->top > 0) {
void* p = mag->items[--mag->top].ptr;
HAK_RET_ALLOC(class_idx, p);
}
// Try TLS active slabs (owner-only, lock-free)
TinySlab* tls = g_tls_active_slab_a[class_idx];
if (!(tls && tls->free_count > 0)) tls = g_tls_active_slab_b[class_idx];
if (tls && tls->free_count > 0) {
tiny_remote_drain_owner(tls);
if (tls->free_count > 0) {
int block_idx = hak_tiny_find_free_block(tls);
if (block_idx >= 0) {
hak_tiny_set_used(tls, block_idx);
tls->free_count--;
size_t bs = g_tiny_class_sizes[class_idx];
void* p = (char*)tls->base + (block_idx * bs);
HAK_RET_ALLOC(class_idx, p);
}
}
}
// Optional: attempt limited refill under trylock (no remote drain)
if (g_wrap_tiny_refill) {
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
if (pthread_mutex_trylock(lock) == 0) {
TinySlab* slab = g_tiny_pool.free_slabs[class_idx];
if (slab && slab->free_count > 0) {
int room = mag->cap - mag->top;
if (room > 16) room = 16; // wrapper refill is small and quick
if (room > slab->free_count) room = slab->free_count;
if (room > 0) {
size_t bs = g_tiny_class_sizes[class_idx];
void* ret = NULL;
for (int i = 0; i < room; i++) {
int idx = hak_tiny_find_free_block(slab);
if (idx < 0) break;
hak_tiny_set_used(slab, idx);
slab->free_count--;
void* p = (char*)slab->base + (idx * bs);
if (i < room - 1) {
mag->items[mag->top].ptr = p;
mag->top++;
} else {
ret = p; // return one directly
}
}
if (slab->free_count == 0) {
move_to_full_list(class_idx, slab);
}
pthread_mutex_unlock(lock);
if (ret) { HAK_RET_ALLOC(class_idx, ret); }
} else {
pthread_mutex_unlock(lock);
}
} else {
pthread_mutex_unlock(lock);
}
}
}
return NULL; // empty → fallback to next allocator tier
}
void* hak_tiny_alloc(size_t size) {
#if !HAKMEM_BUILD_RELEASE
if (!g_tiny_initialized) hak_tiny_init();
#else
if (__builtin_expect(!g_tiny_initialized, 0)) {
hak_tiny_init();
}
#endif
// Default (safe): Avoid using Tiny during wrapper callsTLSガード or 関数)
// If HAKMEM_WRAP_TINY=1, allow Tiny even when called from wrapper.
#if !HAKMEM_BUILD_RELEASE
# if HAKMEM_WRAPPER_TLS_GUARD
if (!g_wrap_tiny_enabled && __builtin_expect(g_tls_in_wrapper != 0, 0)) {
static int log1 = 0;
if (log1 < 2) { fprintf(stderr, "[DEBUG] Tiny blocked: in_wrapper\n"); log1++; }
return NULL;
}
# else
extern int hak_in_wrapper(void);
if (!g_wrap_tiny_enabled && __builtin_expect(hak_in_wrapper() != 0, 0)) {
static int log2 = 0;
if (log2 < 2) { fprintf(stderr, "[DEBUG] Tiny blocked: hak_in_wrapper\n"); log2++; }
return NULL;
}
# endif
#endif
// ========================================================================
// Cooperative stats polling (SIGUSR1 trigger safe point)
hak_tiny_stats_poll();
// ========================================================================
// Phase 6-1.5: Ultra-Simple Fast Path (when enabled)
// ========================================================================
// Design: "Simple Front + Smart Back" - inspired by Mid-Large HAKX +171%
// - 3-4 instruction fast path (Phase 6-1 style)
// - Existing SuperSlab + ACE + Learning backend
// Two variants:
// Phase 6-1.5: -DHAKMEM_TINY_PHASE6_ULTRA_SIMPLE=1 (alignment guessing)
// Phase 6-1.6: -DHAKMEM_TINY_PHASE6_METADATA=1 (metadata header)
#ifdef HAKMEM_TINY_PHASE6_ULTRA_SIMPLE
return hak_tiny_alloc_ultra_simple(size);
#elif defined(HAKMEM_TINY_PHASE6_METADATA)
return hak_tiny_alloc_metadata(size);
#endif
// ========================================================================
// 1. Size → class index
int class_idx = hak_tiny_size_to_class(size);
if (class_idx < 0) {
static int log3 = 0;
if (log3 < 2) { fprintf(stderr, "[DEBUG] Tiny blocked: class_idx < 0 for size %zu\n", size); log3++; }
return NULL; // >1KB
}
#define HAK_RET_ALLOC_WITH_METRIC(ptr) do { \
tiny_diag_track_size_ge1024(size, class_idx); \
HAK_RET_ALLOC(class_idx, (ptr)); \
} while(0)
// Route fingerprint begin (debug-only; no-op unless HAKMEM_ROUTE=1)
ROUTE_BEGIN(class_idx);
do {
static int g_alloc_ring = -1;
if (__builtin_expect(g_alloc_ring == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_ALLOC_RING");
g_alloc_ring = (e && *e && *e != '0') ? 1 : 0;
}
if (g_alloc_ring) {
tiny_debug_ring_record(TINY_RING_EVENT_ALLOC_ENTER, (uint16_t)class_idx, (void*)(uintptr_t)size, 0);
}
} while (0);
// Phase 13-A/B: Tiny Heap v2 front (tcache-like, A/B)
if (__builtin_expect(tiny_heap_v2_enabled() && front_prune_heapv2_enabled() && class_idx <= 3, 0)) {
void* base = tiny_heap_v2_alloc_by_class(class_idx);
if (base) {
front_metrics_heapv2_hit(class_idx);
HAK_RET_ALLOC_WITH_METRIC(base); // Header write + return USER pointer
} else {
front_metrics_heapv2_miss(class_idx);
}
// Fall through to existing front path if HeapV2 misses
}
#if HAKMEM_TINY_MINIMAL_FRONT
// Minimal Front for hot tiny classes (bench-focused):
// SLL direct pop → minimal refill → pop, bypassing other layers.
if (__builtin_expect(class_idx <= 3, 1)) {
void* head = NULL;
if (tls_sll_pop(class_idx, &head)) {
HAK_RET_ALLOC_WITH_METRIC(head);
}
// Refill a small batch directly from TLS-cached SuperSlab
#if HAKMEM_TINY_P0_BATCH_REFILL
(void)sll_refill_batch_from_ss(class_idx, 32);
#else
(void)sll_refill_small_from_ss(class_idx, 32);
#endif
if (tls_sll_pop(class_idx, &head)) {
HAK_RET_ALLOC_WITH_METRIC(head);
}
// Fall through to slow path if still empty
}
#endif
// Ultra-Front - REMOVED (dead code cleanup 2025-11-27)
if (__builtin_expect(!g_debug_fast0, 1)) {
#ifdef HAKMEM_TINY_BENCH_FASTPATH
if (__builtin_expect(class_idx <= HAKMEM_TINY_BENCH_TINY_CLASSES, 1)) {
if (__builtin_expect(class_idx <= 3, 1)) {
unsigned char* done = &g_tls_bench_warm_done[class_idx];
if (__builtin_expect(*done == 0, 0)) {
int warm = (class_idx == 0) ? HAKMEM_TINY_BENCH_WARMUP8 :
(class_idx == 1) ? HAKMEM_TINY_BENCH_WARMUP16 :
(class_idx == 2) ? HAKMEM_TINY_BENCH_WARMUP32 :
HAKMEM_TINY_BENCH_WARMUP64;
#if HAKMEM_TINY_P0_BATCH_REFILL
if (warm > 0) (void)sll_refill_batch_from_ss(class_idx, warm);
#else
if (warm > 0) (void)sll_refill_small_from_ss(class_idx, warm);
#endif
*done = 1;
}
}
#ifndef HAKMEM_TINY_BENCH_SLL_ONLY
tiny_small_mags_init_once();
if (class_idx > 3) tiny_mag_init_if_needed(class_idx);
#endif
void* head = NULL;
if (tls_sll_pop(class_idx, &head)) {
tiny_debug_ring_record(TINY_RING_EVENT_ALLOC_SUCCESS, (uint16_t)class_idx, head, 0);
HAK_RET_ALLOC_WITH_METRIC(head);
}
#ifndef HAKMEM_TINY_BENCH_SLL_ONLY
TinyTLSMag* mag = &g_tls_mags[class_idx];
int t = mag->top;
if (__builtin_expect(t > 0, 1)) {
void* p = mag->items[--t].ptr;
mag->top = t;
tiny_debug_ring_record(TINY_RING_EVENT_ALLOC_SUCCESS, (uint16_t)class_idx, p, 1);
HAK_RET_ALLOC_WITH_METRIC(p);
}
#endif
int bench_refill = (class_idx == 0) ? HAKMEM_TINY_BENCH_REFILL8 :
(class_idx == 1) ? HAKMEM_TINY_BENCH_REFILL16 :
(class_idx == 2) ? HAKMEM_TINY_BENCH_REFILL32 :
HAKMEM_TINY_BENCH_REFILL64;
#if HAKMEM_TINY_P0_BATCH_REFILL
if (__builtin_expect(sll_refill_batch_from_ss(class_idx, bench_refill) > 0, 0)) {
#else
if (__builtin_expect(sll_refill_small_from_ss(class_idx, bench_refill) > 0, 0)) {
#endif
if (tls_sll_pop(class_idx, &head)) {
tiny_debug_ring_record(TINY_RING_EVENT_ALLOC_SUCCESS, (uint16_t)class_idx, head, 2);
HAK_RET_ALLOC_WITH_METRIC(head);
}
}
// fallthrough to slow path on miss
}
#endif
// TinyHotMag front: fast-tierが枯渇したとき、キャッシュを再補充してから利用する
if (__builtin_expect(g_hotmag_enable && class_idx <= 2 && g_fast_head[class_idx] == NULL, 0)) {
hotmag_init_if_needed(class_idx);
TinyHotMag* hm = &g_tls_hot_mag[class_idx];
void* hotmag_ptr = hotmag_pop(class_idx);
if (__builtin_expect(hotmag_ptr == NULL, 0)) {
if (hotmag_try_refill(class_idx, hm) > 0) {
hotmag_ptr = hotmag_pop(class_idx);
}
}
if (__builtin_expect(hotmag_ptr != NULL, 1)) {
tiny_debug_ring_record(TINY_RING_EVENT_ALLOC_SUCCESS, (uint16_t)class_idx, hotmag_ptr, 3);
HAK_RET_ALLOC_WITH_METRIC(hotmag_ptr);
}
}
if (g_hot_alloc_fn[class_idx] != NULL) {
void* fast_hot = NULL;
switch (class_idx) {
case 0:
fast_hot = tiny_hot_pop_class0();
break;
case 1:
fast_hot = tiny_hot_pop_class1();
break;
case 2:
fast_hot = tiny_hot_pop_class2();
break;
case 3:
fast_hot = tiny_hot_pop_class3();
break;
default:
fast_hot = NULL;
break;
}
if (__builtin_expect(fast_hot != NULL, 1)) {
#if HAKMEM_BUILD_DEBUG
g_tls_hit_count[class_idx]++;
#endif
tiny_debug_ring_record(TINY_RING_EVENT_ALLOC_SUCCESS, (uint16_t)class_idx, fast_hot, 4);
HAK_RET_ALLOC_WITH_METRIC(fast_hot);
}
}
void* fast = tiny_fast_pop(class_idx);
if (__builtin_expect(fast != NULL, 0)) {
#if HAKMEM_BUILD_DEBUG
g_tls_hit_count[class_idx]++;
#endif
tiny_debug_ring_record(TINY_RING_EVENT_ALLOC_SUCCESS, (uint16_t)class_idx, fast, 5);
HAK_RET_ALLOC_WITH_METRIC(fast);
}
} else {
tiny_debug_ring_record(TINY_RING_EVENT_FRONT_BYPASS, (uint16_t)class_idx, NULL, 0);
}
void* slow_ptr = hak_tiny_alloc_slow(size, class_idx);
if (slow_ptr) {
tiny_debug_ring_record(TINY_RING_EVENT_ALLOC_SUCCESS, (uint16_t)class_idx, slow_ptr, 6);
HAK_RET_ALLOC_WITH_METRIC(slow_ptr); // Increment stats for slow path success
}
tiny_alloc_dump_tls_state(class_idx, "fail", &g_tls_slabs[class_idx]);
tiny_debug_ring_record(TINY_RING_EVENT_ALLOC_NULL, (uint16_t)class_idx, NULL, 0);
return slow_ptr;
}
#undef HAK_RET_ALLOC_WITH_METRIC