Phase 1: Box Theory refactoring + include reduction
Phase 1-1: Split hakmem_tiny_free.inc (1,711 → 452 lines, -73%)
- Created tiny_free_magazine.inc.h (413 lines) - Magazine layer
- Created tiny_superslab_alloc.inc.h (394 lines) - SuperSlab alloc
- Created tiny_superslab_free.inc.h (305 lines) - SuperSlab free
Phase 1-2++: Refactor hakmem_pool.c (1,481 → 907 lines, -38.8%)
- Created pool_tls_types.inc.h (32 lines) - TLS structures
- Created pool_mf2_types.inc.h (266 lines) - MF2 data structures
- Created pool_mf2_helpers.inc.h (158 lines) - Helper functions
- Created pool_mf2_adoption.inc.h (129 lines) - Adoption logic
Phase 1-3: Reduce hakmem_tiny.c includes (60 → 46, -23.3%)
- Created tiny_system.h - System headers umbrella (stdio, stdlib, etc.)
- Created tiny_api.h - API headers umbrella (stats, query, rss, registry)
Performance: 4.19M ops/s maintained (±0% regression)
Verified: Larson benchmark 2×8×128×1024 = 4,192,128 ops/s
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-06 21:54:12 +09:00
|
|
|
|
// tiny_superslab_alloc.inc.h - SuperSlab Allocation Layer
|
|
|
|
|
|
// Purpose: Slab allocation, refill, and adoption logic
|
|
|
|
|
|
// Extracted from: hakmem_tiny_free.inc lines 626-1170
|
|
|
|
|
|
// Box Theory: Box 4 (Refill/Adoption) integration
|
|
|
|
|
|
//
|
|
|
|
|
|
// Public functions:
|
|
|
|
|
|
// - superslab_alloc_from_slab(): Allocate from specific slab (linear or freelist)
|
|
|
|
|
|
// - superslab_refill(): Refill TLS slab (adoption, registry scan, fresh alloc)
|
|
|
|
|
|
// - hak_tiny_alloc_superslab(): Main SuperSlab allocation entry point
|
|
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
// Phase 6.23: SuperSlab Allocation Helpers
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
// Phase 6.24: Allocate from SuperSlab slab (lazy freelist + linear allocation)
|
|
|
|
|
|
static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx) {
|
|
|
|
|
|
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
|
|
|
|
|
|
|
|
|
|
|
// Ensure remote queue is drained before handing blocks back to TLS
|
|
|
|
|
|
if (atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0) {
|
|
|
|
|
|
uint32_t self_tid = tiny_self_u32();
|
|
|
|
|
|
SlabHandle h = slab_try_acquire(ss, slab_idx, self_tid);
|
|
|
|
|
|
if (slab_is_valid(&h)) {
|
|
|
|
|
|
slab_drain_remote_full(&h);
|
|
|
|
|
|
int pending = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire) != 0;
|
|
|
|
|
|
if (__builtin_expect(pending, 0)) {
|
|
|
|
|
|
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
|
|
|
|
|
uintptr_t head = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_relaxed);
|
|
|
|
|
|
tiny_remote_watch_note("alloc_pending_remote",
|
|
|
|
|
|
ss,
|
|
|
|
|
|
slab_idx,
|
|
|
|
|
|
(void*)head,
|
|
|
|
|
|
0xA243u,
|
|
|
|
|
|
self_tid,
|
|
|
|
|
|
0);
|
|
|
|
|
|
}
|
|
|
|
|
|
slab_release(&h);
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
}
|
|
|
|
|
|
slab_release(&h);
|
|
|
|
|
|
} else {
|
|
|
|
|
|
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
|
|
|
|
|
tiny_remote_watch_note("alloc_acquire_fail",
|
|
|
|
|
|
ss,
|
|
|
|
|
|
slab_idx,
|
|
|
|
|
|
meta,
|
|
|
|
|
|
0xA244u,
|
|
|
|
|
|
self_tid,
|
|
|
|
|
|
0);
|
|
|
|
|
|
}
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
|
|
|
|
|
uintptr_t head_pending = atomic_load_explicit(&ss->remote_heads[slab_idx], memory_order_acquire);
|
|
|
|
|
|
if (head_pending != 0) {
|
|
|
|
|
|
tiny_remote_watch_note("alloc_remote_pending",
|
|
|
|
|
|
ss,
|
|
|
|
|
|
slab_idx,
|
|
|
|
|
|
(void*)head_pending,
|
|
|
|
|
|
0xA247u,
|
|
|
|
|
|
tiny_self_u32(),
|
|
|
|
|
|
1);
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Phase 6.24: Linear allocation mode (freelist == NULL)
|
|
|
|
|
|
// This avoids the 4000-8000 cycle cost of building freelist on init
|
|
|
|
|
|
if (meta->freelist == NULL && meta->used < meta->capacity) {
|
|
|
|
|
|
// Linear allocation: sequential memory access (cache-friendly!)
|
|
|
|
|
|
size_t block_size = g_tiny_class_sizes[ss->size_class];
|
|
|
|
|
|
void* slab_start = slab_data_start(ss, slab_idx);
|
|
|
|
|
|
|
|
|
|
|
|
// First slab: skip SuperSlab header
|
|
|
|
|
|
if (slab_idx == 0) {
|
|
|
|
|
|
slab_start = (char*)slab_start + 1024;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void* block = (char*)slab_start + (meta->used * block_size);
|
|
|
|
|
|
meta->used++;
|
|
|
|
|
|
tiny_remote_track_on_alloc(ss, slab_idx, block, "linear_alloc", 0);
|
|
|
|
|
|
tiny_remote_assert_not_remote(ss, slab_idx, block, "linear_alloc_ret", 0);
|
|
|
|
|
|
return block; // Fast path: O(1) pointer arithmetic
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Freelist mode (after first free())
|
|
|
|
|
|
if (meta->freelist) {
|
|
|
|
|
|
void* block = meta->freelist;
|
|
|
|
|
|
meta->freelist = *(void**)block; // Pop from freelist
|
|
|
|
|
|
meta->used++;
|
|
|
|
|
|
tiny_remote_track_on_alloc(ss, slab_idx, block, "freelist_alloc", 0);
|
|
|
|
|
|
tiny_remote_assert_not_remote(ss, slab_idx, block, "freelist_alloc_ret", 0);
|
|
|
|
|
|
return block;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return NULL; // Slab is full
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Phase 6.24 & 7.6: Refill TLS SuperSlab (with unified TLS cache + deferred allocation)
|
|
|
|
|
|
static SuperSlab* superslab_refill(int class_idx) {
|
|
|
|
|
|
#if HAKMEM_DEBUG_COUNTERS
|
|
|
|
|
|
g_superslab_refill_calls_dbg[class_idx]++;
|
|
|
|
|
|
#endif
|
|
|
|
|
|
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
|
|
|
|
|
static int g_ss_adopt_en = -1; // env: HAKMEM_TINY_SS_ADOPT=1; default auto-on if remote seen
|
|
|
|
|
|
if (g_ss_adopt_en == -1) {
|
|
|
|
|
|
char* e = getenv("HAKMEM_TINY_SS_ADOPT");
|
|
|
|
|
|
if (e) {
|
|
|
|
|
|
g_ss_adopt_en = (*e != '0') ? 1 : 0;
|
|
|
|
|
|
} else {
|
|
|
|
|
|
extern _Atomic int g_ss_remote_seen;
|
|
|
|
|
|
g_ss_adopt_en = (atomic_load_explicit(&g_ss_remote_seen, memory_order_relaxed) != 0) ? 1 : 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
extern int g_adopt_cool_period;
|
|
|
|
|
|
extern __thread int g_tls_adopt_cd[];
|
|
|
|
|
|
if (g_adopt_cool_period == -1) {
|
|
|
|
|
|
char* cd = getenv("HAKMEM_TINY_SS_ADOPT_COOLDOWN");
|
|
|
|
|
|
int v = (cd ? atoi(cd) : 0);
|
|
|
|
|
|
if (v < 0) v = 0; if (v > 1024) v = 1024;
|
|
|
|
|
|
g_adopt_cool_period = v;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int g_superslab_refill_debug_once = 0;
|
|
|
|
|
|
SuperSlab* prev_ss = tls->ss;
|
|
|
|
|
|
TinySlabMeta* prev_meta = tls->meta;
|
|
|
|
|
|
uint8_t prev_slab_idx = tls->slab_idx;
|
|
|
|
|
|
uint8_t prev_active = prev_ss ? prev_ss->active_slabs : 0;
|
|
|
|
|
|
uint32_t prev_bitmap = prev_ss ? prev_ss->slab_bitmap : 0;
|
|
|
|
|
|
uint32_t prev_meta_used = prev_meta ? prev_meta->used : 0;
|
|
|
|
|
|
uint32_t prev_meta_cap = prev_meta ? prev_meta->capacity : 0;
|
|
|
|
|
|
int free_idx_attempted = -2; // -2 = not evaluated, -1 = none, >=0 = chosen
|
|
|
|
|
|
int reused_slabs = 0;
|
|
|
|
|
|
|
|
|
|
|
|
// Optional: Mid-size simple refill to avoid multi-layer scans (class>=4)
|
|
|
|
|
|
do {
|
|
|
|
|
|
static int g_mid_simple_warn = 0;
|
|
|
|
|
|
if (class_idx >= 4 && tiny_mid_refill_simple_enabled()) {
|
|
|
|
|
|
// If current TLS has a SuperSlab, prefer taking a virgin slab directly
|
|
|
|
|
|
if (tls->ss) {
|
|
|
|
|
|
int tls_cap = ss_slabs_capacity(tls->ss);
|
|
|
|
|
|
if (tls->ss->active_slabs < tls_cap) {
|
|
|
|
|
|
int free_idx = superslab_find_free_slab(tls->ss);
|
|
|
|
|
|
if (free_idx >= 0) {
|
|
|
|
|
|
uint32_t my_tid = tiny_self_u32();
|
|
|
|
|
|
superslab_init_slab(tls->ss, free_idx, g_tiny_class_sizes[class_idx], my_tid);
|
|
|
|
|
|
tiny_tls_bind_slab(tls, tls->ss, free_idx);
|
|
|
|
|
|
return tls->ss;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
// Otherwise allocate a fresh SuperSlab and bind first slab
|
|
|
|
|
|
SuperSlab* ssn = superslab_allocate((uint8_t)class_idx);
|
|
|
|
|
|
if (!ssn) {
|
|
|
|
|
|
if (!g_superslab_refill_debug_once && g_mid_simple_warn < 2) {
|
|
|
|
|
|
g_mid_simple_warn++;
|
|
|
|
|
|
int err = errno;
|
|
|
|
|
|
fprintf(stderr, "[DEBUG] mid_simple_refill OOM class=%d errno=%d\n", class_idx, err);
|
|
|
|
|
|
}
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
}
|
|
|
|
|
|
uint32_t my_tid = tiny_self_u32();
|
|
|
|
|
|
superslab_init_slab(ssn, 0, g_tiny_class_sizes[class_idx], my_tid);
|
|
|
|
|
|
SuperSlab* old = tls->ss;
|
|
|
|
|
|
tiny_tls_bind_slab(tls, ssn, 0);
|
|
|
|
|
|
superslab_ref_inc(ssn);
|
|
|
|
|
|
if (old && old != ssn) { superslab_ref_dec(old); }
|
|
|
|
|
|
return ssn;
|
|
|
|
|
|
}
|
|
|
|
|
|
} while (0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// First, try to adopt a published partial SuperSlab for this class
|
|
|
|
|
|
if (g_ss_adopt_en) {
|
|
|
|
|
|
if (g_adopt_cool_period > 0) {
|
|
|
|
|
|
if (g_tls_adopt_cd[class_idx] > 0) {
|
|
|
|
|
|
g_tls_adopt_cd[class_idx]--;
|
|
|
|
|
|
} else {
|
|
|
|
|
|
// eligible to adopt
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
if (g_adopt_cool_period == 0 || g_tls_adopt_cd[class_idx] == 0) {
|
|
|
|
|
|
SuperSlab* adopt = ss_partial_adopt(class_idx);
|
|
|
|
|
|
if (adopt && adopt->magic == SUPERSLAB_MAGIC) {
|
|
|
|
|
|
// ========================================================================
|
|
|
|
|
|
// Quick Win #2: First-Fit Adopt (vs Best-Fit scoring all 32 slabs)
|
|
|
|
|
|
// For Larson, any slab with freelist works - no need to score all 32!
|
|
|
|
|
|
// Expected improvement: -3,000 cycles (from 32 atomic loads + 32 scores)
|
|
|
|
|
|
// ========================================================================
|
|
|
|
|
|
int adopt_cap = ss_slabs_capacity(adopt);
|
|
|
|
|
|
int best = -1;
|
|
|
|
|
|
for (int s = 0; s < adopt_cap; s++) {
|
|
|
|
|
|
TinySlabMeta* m = &adopt->slabs[s];
|
|
|
|
|
|
// Quick check: Does this slab have a freelist?
|
|
|
|
|
|
if (m->freelist) {
|
|
|
|
|
|
// Yes! Try to acquire it immediately (first-fit)
|
|
|
|
|
|
best = s;
|
|
|
|
|
|
break; // ✅ OPTIMIZATION: Stop at first slab with freelist!
|
|
|
|
|
|
}
|
|
|
|
|
|
// Optional: Also check remote_heads if we want to prioritize those
|
|
|
|
|
|
// (But for Larson, freelist is sufficient)
|
|
|
|
|
|
}
|
|
|
|
|
|
if (best >= 0) {
|
|
|
|
|
|
// Box: Try to acquire ownership atomically
|
|
|
|
|
|
uint32_t self = tiny_self_u32();
|
|
|
|
|
|
SlabHandle h = slab_try_acquire(adopt, best, self);
|
|
|
|
|
|
if (slab_is_valid(&h)) {
|
|
|
|
|
|
slab_drain_remote_full(&h);
|
|
|
|
|
|
if (slab_remote_pending(&h)) {
|
|
|
|
|
|
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
|
|
|
|
|
uintptr_t head = atomic_load_explicit(&h.ss->remote_heads[h.slab_idx], memory_order_relaxed);
|
|
|
|
|
|
tiny_remote_watch_note("adopt_remote_pending",
|
|
|
|
|
|
h.ss,
|
|
|
|
|
|
h.slab_idx,
|
|
|
|
|
|
(void*)head,
|
|
|
|
|
|
0xA255u,
|
|
|
|
|
|
self,
|
|
|
|
|
|
0);
|
|
|
|
|
|
}
|
|
|
|
|
|
// Remote still pending; give up adopt path and fall through to normal refill.
|
|
|
|
|
|
slab_release(&h);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Box 4 Boundary: bind は remote_head==0 を保証する必要がある
|
|
|
|
|
|
// slab_is_safe_to_bind() で TOCTOU-safe にチェック
|
|
|
|
|
|
if (slab_is_safe_to_bind(&h)) {
|
|
|
|
|
|
// Optional: move a few nodes to Front SLL to boost next hits
|
|
|
|
|
|
tiny_drain_freelist_to_sll_once(h.ss, h.slab_idx, class_idx);
|
|
|
|
|
|
// 安全に bind 可能(freelist 存在 && remote_head==0 保証)
|
|
|
|
|
|
tiny_tls_bind_slab(tls, h.ss, h.slab_idx);
|
|
|
|
|
|
if (g_adopt_cool_period > 0) {
|
|
|
|
|
|
g_tls_adopt_cd[class_idx] = g_adopt_cool_period;
|
|
|
|
|
|
}
|
|
|
|
|
|
return h.ss;
|
|
|
|
|
|
}
|
|
|
|
|
|
// Safe to bind 失敗(freelist なしor remote pending)→ adopt 中止
|
|
|
|
|
|
slab_release(&h);
|
|
|
|
|
|
}
|
|
|
|
|
|
// Failed to acquire or no freelist - continue searching
|
|
|
|
|
|
}
|
|
|
|
|
|
// If no freelist found, ignore and continue (optional: republish)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Phase 7.6 Step 4: Check existing SuperSlab with priority order
|
|
|
|
|
|
if (tls->ss) {
|
|
|
|
|
|
// Priority 1: Reuse slabs with freelist (already freed blocks)
|
|
|
|
|
|
int tls_cap = ss_slabs_capacity(tls->ss);
|
|
|
|
|
|
uint32_t nonempty_mask = 0;
|
|
|
|
|
|
do {
|
|
|
|
|
|
static int g_mask_en = -1;
|
|
|
|
|
|
if (__builtin_expect(g_mask_en == -1, 0)) {
|
|
|
|
|
|
const char* e = getenv("HAKMEM_TINY_FREELIST_MASK");
|
|
|
|
|
|
g_mask_en = (e && *e && *e != '0') ? 1 : 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (__builtin_expect(g_mask_en, 0)) {
|
|
|
|
|
|
nonempty_mask = atomic_load_explicit(&tls->ss->freelist_mask, memory_order_acquire);
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
for (int i = 0; i < tls_cap; i++) {
|
|
|
|
|
|
if (tls->ss->slabs[i].freelist) nonempty_mask |= (1u << i);
|
|
|
|
|
|
}
|
|
|
|
|
|
} while (0);
|
|
|
|
|
|
|
|
|
|
|
|
// O(1) lookup: scan mask with ctz (1 instruction!)
|
|
|
|
|
|
while (__builtin_expect(nonempty_mask != 0, 1)) {
|
|
|
|
|
|
int i = __builtin_ctz(nonempty_mask); // Find first non-empty slab (O(1))
|
|
|
|
|
|
nonempty_mask &= ~(1u << i); // Clear bit for next iteration
|
|
|
|
|
|
|
|
|
|
|
|
// FIX #1 DELETED (Race condition fix):
|
|
|
|
|
|
// Previous drain without ownership caused concurrent freelist corruption.
|
|
|
|
|
|
// Ownership protocol: MUST bind+owner_cas BEFORE drain (see Fix #3 in tiny_refill.h).
|
|
|
|
|
|
// Remote frees will be drained when the slab is adopted (see tiny_refill.h paths).
|
|
|
|
|
|
|
|
|
|
|
|
uint32_t self_tid = tiny_self_u32();
|
|
|
|
|
|
SlabHandle h = slab_try_acquire(tls->ss, i, self_tid);
|
|
|
|
|
|
if (slab_is_valid(&h)) {
|
|
|
|
|
|
if (slab_remote_pending(&h)) {
|
|
|
|
|
|
slab_drain_remote_full(&h);
|
|
|
|
|
|
if (__builtin_expect(g_debug_remote_guard, 0)) {
|
|
|
|
|
|
uintptr_t head = atomic_load_explicit(&h.ss->remote_heads[h.slab_idx], memory_order_relaxed);
|
|
|
|
|
|
tiny_remote_watch_note("reuse_remote_pending",
|
|
|
|
|
|
h.ss,
|
|
|
|
|
|
h.slab_idx,
|
|
|
|
|
|
(void*)head,
|
|
|
|
|
|
0xA254u,
|
|
|
|
|
|
self_tid,
|
|
|
|
|
|
0);
|
|
|
|
|
|
}
|
|
|
|
|
|
slab_release(&h);
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
|
|
|
|
|
// Box 4 Boundary: bind は remote_head==0 を保証する必要がある
|
|
|
|
|
|
if (slab_is_safe_to_bind(&h)) {
|
|
|
|
|
|
// Optional: move a few nodes to Front SLL to boost next hits
|
|
|
|
|
|
tiny_drain_freelist_to_sll_once(h.ss, h.slab_idx, class_idx);
|
|
|
|
|
|
reused_slabs = 1;
|
|
|
|
|
|
tiny_tls_bind_slab(tls, h.ss, h.slab_idx);
|
|
|
|
|
|
return h.ss;
|
|
|
|
|
|
}
|
|
|
|
|
|
// Safe to bind 失敗 → 次の slab を試す
|
|
|
|
|
|
slab_release(&h);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Priority 2: Use unused slabs (virgin slabs)
|
|
|
|
|
|
if (tls->ss->active_slabs < tls_cap) {
|
|
|
|
|
|
// Find next free slab
|
|
|
|
|
|
int free_idx = superslab_find_free_slab(tls->ss);
|
|
|
|
|
|
free_idx_attempted = free_idx;
|
|
|
|
|
|
if (free_idx >= 0) {
|
|
|
|
|
|
// Initialize this slab
|
|
|
|
|
|
uint32_t my_tid = tiny_self_u32();
|
|
|
|
|
|
superslab_init_slab(tls->ss, free_idx, g_tiny_class_sizes[class_idx], my_tid);
|
|
|
|
|
|
|
|
|
|
|
|
// Update TLS cache (unified update)
|
|
|
|
|
|
tiny_tls_bind_slab(tls, tls->ss, free_idx);
|
|
|
|
|
|
|
|
|
|
|
|
return tls->ss;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Try to adopt a partial SuperSlab from registry (one-shot, cheap scan)
|
|
|
|
|
|
// This reduces pressure to allocate new SS when other threads freed blocks.
|
|
|
|
|
|
// Phase 6: Registry Optimization - Use per-class registry for O(class_size) scan
|
|
|
|
|
|
if (!tls->ss) {
|
|
|
|
|
|
// Phase 6: Use per-class registry (262K → ~10-100 entries per class!)
|
|
|
|
|
|
extern SuperSlab* g_super_reg_by_class[TINY_NUM_CLASSES][SUPER_REG_PER_CLASS];
|
|
|
|
|
|
extern int g_super_reg_class_size[TINY_NUM_CLASSES];
|
|
|
|
|
|
|
|
|
|
|
|
const int scan_max = tiny_reg_scan_max();
|
|
|
|
|
|
int reg_size = g_super_reg_class_size[class_idx];
|
|
|
|
|
|
int scan_limit = (scan_max < reg_size) ? scan_max : reg_size;
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < scan_limit; i++) {
|
|
|
|
|
|
SuperSlab* ss = g_super_reg_by_class[class_idx][i];
|
|
|
|
|
|
if (!ss || ss->magic != SUPERSLAB_MAGIC) continue;
|
|
|
|
|
|
// Note: class_idx check is not needed (per-class registry!)
|
|
|
|
|
|
|
|
|
|
|
|
// Pick first slab with freelist (Box 4: 所有権取得 + remote check)
|
|
|
|
|
|
int reg_cap = ss_slabs_capacity(ss);
|
|
|
|
|
|
uint32_t self_tid = tiny_self_u32();
|
|
|
|
|
|
for (int s = 0; s < reg_cap; s++) {
|
|
|
|
|
|
if (ss->slabs[s].freelist) {
|
|
|
|
|
|
SlabHandle h = slab_try_acquire(ss, s, self_tid);
|
|
|
|
|
|
if (slab_is_valid(&h)) {
|
|
|
|
|
|
slab_drain_remote_full(&h);
|
|
|
|
|
|
if (slab_is_safe_to_bind(&h)) {
|
|
|
|
|
|
tiny_drain_freelist_to_sll_once(h.ss, h.slab_idx, class_idx);
|
|
|
|
|
|
tiny_tls_bind_slab(tls, ss, s);
|
|
|
|
|
|
return ss;
|
|
|
|
|
|
}
|
|
|
|
|
|
slab_release(&h);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Must-adopt-before-mmap gate: attempt sticky/hot/bench/mailbox/registry small-window
|
|
|
|
|
|
{
|
|
|
|
|
|
SuperSlab* gate_ss = tiny_must_adopt_gate(class_idx, tls);
|
|
|
|
|
|
if (gate_ss) return gate_ss;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Allocate new SuperSlab
|
|
|
|
|
|
SuperSlab* ss = superslab_allocate((uint8_t)class_idx);
|
|
|
|
|
|
if (!ss) {
|
|
|
|
|
|
if (!g_superslab_refill_debug_once) {
|
|
|
|
|
|
g_superslab_refill_debug_once = 1;
|
|
|
|
|
|
int err = errno;
|
|
|
|
|
|
fprintf(stderr,
|
2025-11-07 18:07:48 +09:00
|
|
|
|
"[DEBUG] superslab_refill returned NULL (OOM) detail: class=%d prev_ss=%p active=%u bitmap=0x%08x prev_meta=%p used=%u cap=%u slab_idx=%u reused_freelist=%d free_idx=%d errno=%d\n",
|
Phase 1: Box Theory refactoring + include reduction
Phase 1-1: Split hakmem_tiny_free.inc (1,711 → 452 lines, -73%)
- Created tiny_free_magazine.inc.h (413 lines) - Magazine layer
- Created tiny_superslab_alloc.inc.h (394 lines) - SuperSlab alloc
- Created tiny_superslab_free.inc.h (305 lines) - SuperSlab free
Phase 1-2++: Refactor hakmem_pool.c (1,481 → 907 lines, -38.8%)
- Created pool_tls_types.inc.h (32 lines) - TLS structures
- Created pool_mf2_types.inc.h (266 lines) - MF2 data structures
- Created pool_mf2_helpers.inc.h (158 lines) - Helper functions
- Created pool_mf2_adoption.inc.h (129 lines) - Adoption logic
Phase 1-3: Reduce hakmem_tiny.c includes (60 → 46, -23.3%)
- Created tiny_system.h - System headers umbrella (stdio, stdlib, etc.)
- Created tiny_api.h - API headers umbrella (stats, query, rss, registry)
Performance: 4.19M ops/s maintained (±0% regression)
Verified: Larson benchmark 2×8×128×1024 = 4,192,128 ops/s
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-06 21:54:12 +09:00
|
|
|
|
class_idx,
|
|
|
|
|
|
(void*)prev_ss,
|
|
|
|
|
|
(unsigned)prev_active,
|
|
|
|
|
|
prev_bitmap,
|
|
|
|
|
|
(void*)prev_meta,
|
|
|
|
|
|
(unsigned)prev_meta_used,
|
|
|
|
|
|
(unsigned)prev_meta_cap,
|
|
|
|
|
|
(unsigned)prev_slab_idx,
|
|
|
|
|
|
reused_slabs,
|
|
|
|
|
|
free_idx_attempted,
|
|
|
|
|
|
err);
|
|
|
|
|
|
}
|
2025-11-07 18:07:48 +09:00
|
|
|
|
// Clear errno to avoid confusion in fallback paths
|
|
|
|
|
|
errno = 0;
|
Phase 1: Box Theory refactoring + include reduction
Phase 1-1: Split hakmem_tiny_free.inc (1,711 → 452 lines, -73%)
- Created tiny_free_magazine.inc.h (413 lines) - Magazine layer
- Created tiny_superslab_alloc.inc.h (394 lines) - SuperSlab alloc
- Created tiny_superslab_free.inc.h (305 lines) - SuperSlab free
Phase 1-2++: Refactor hakmem_pool.c (1,481 → 907 lines, -38.8%)
- Created pool_tls_types.inc.h (32 lines) - TLS structures
- Created pool_mf2_types.inc.h (266 lines) - MF2 data structures
- Created pool_mf2_helpers.inc.h (158 lines) - Helper functions
- Created pool_mf2_adoption.inc.h (129 lines) - Adoption logic
Phase 1-3: Reduce hakmem_tiny.c includes (60 → 46, -23.3%)
- Created tiny_system.h - System headers umbrella (stdio, stdlib, etc.)
- Created tiny_api.h - API headers umbrella (stats, query, rss, registry)
Performance: 4.19M ops/s maintained (±0% regression)
Verified: Larson benchmark 2×8×128×1024 = 4,192,128 ops/s
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-06 21:54:12 +09:00
|
|
|
|
return NULL; // OOM
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Initialize first slab
|
|
|
|
|
|
uint32_t my_tid = tiny_self_u32();
|
|
|
|
|
|
superslab_init_slab(ss, 0, g_tiny_class_sizes[class_idx], my_tid);
|
|
|
|
|
|
|
|
|
|
|
|
// Cache in unified TLS(前のSS参照を解放)
|
|
|
|
|
|
SuperSlab* old = tls->ss;
|
|
|
|
|
|
tiny_tls_bind_slab(tls, ss, 0);
|
|
|
|
|
|
// Maintain refcount(将来の空回収に備え、TLS参照をカウント)
|
|
|
|
|
|
superslab_ref_inc(ss);
|
|
|
|
|
|
if (old && old != ss) {
|
|
|
|
|
|
superslab_ref_dec(old);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return ss;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Phase 6.24: SuperSlab-based allocation (TLS unified, Medium fix)
|
|
|
|
|
|
static inline void* hak_tiny_alloc_superslab(int class_idx) {
|
|
|
|
|
|
// DEBUG: Function entry trace (gated to avoid ring spam)
|
|
|
|
|
|
do {
|
|
|
|
|
|
static int g_alloc_ring = -1;
|
|
|
|
|
|
if (__builtin_expect(g_alloc_ring == -1, 0)) {
|
|
|
|
|
|
const char* e = getenv("HAKMEM_TINY_ALLOC_RING");
|
|
|
|
|
|
g_alloc_ring = (e && *e && *e != '0') ? 1 : 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (g_alloc_ring) {
|
|
|
|
|
|
tiny_debug_ring_record(TINY_RING_EVENT_ALLOC_ENTER, 0x01, (void*)(uintptr_t)class_idx, 0);
|
|
|
|
|
|
}
|
|
|
|
|
|
} while (0);
|
|
|
|
|
|
|
|
|
|
|
|
// MidTC fast path: 128..1024B(class>=4)はTLS tcacheを最優先
|
|
|
|
|
|
do {
|
|
|
|
|
|
void* mp = midtc_pop(class_idx);
|
|
|
|
|
|
if (mp) {
|
|
|
|
|
|
HAK_RET_ALLOC(class_idx, mp);
|
|
|
|
|
|
}
|
|
|
|
|
|
} while (0);
|
|
|
|
|
|
|
|
|
|
|
|
// Phase 6.24: 1 TLS read (down from 3)
|
|
|
|
|
|
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
|
|
|
|
|
|
|
|
|
|
|
TinySlabMeta* meta = tls->meta;
|
|
|
|
|
|
int slab_idx = tls->slab_idx;
|
|
|
|
|
|
if (meta && slab_idx >= 0 && tls->ss) {
|
|
|
|
|
|
// A/B: Relaxed read for remote head presence check
|
|
|
|
|
|
static int g_alloc_remote_relax = -1; // env: HAKMEM_TINY_ALLOC_REMOTE_RELAX=1 → relaxed
|
|
|
|
|
|
if (__builtin_expect(g_alloc_remote_relax == -1, 0)) {
|
|
|
|
|
|
const char* e = getenv("HAKMEM_TINY_ALLOC_REMOTE_RELAX");
|
|
|
|
|
|
g_alloc_remote_relax = (e && *e && *e != '0') ? 1 : 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
uintptr_t pending = atomic_load_explicit(&tls->ss->remote_heads[slab_idx],
|
|
|
|
|
|
g_alloc_remote_relax ? memory_order_relaxed
|
|
|
|
|
|
: memory_order_acquire);
|
|
|
|
|
|
if (__builtin_expect(pending != 0, 0)) {
|
|
|
|
|
|
uint32_t self_tid = tiny_self_u32();
|
|
|
|
|
|
if (ss_owner_try_acquire(meta, self_tid)) {
|
|
|
|
|
|
_ss_remote_drain_to_freelist_unsafe(tls->ss, slab_idx, meta);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// FIX #2 DELETED (Race condition fix):
|
|
|
|
|
|
// Previous drain-all-slabs without ownership caused concurrent freelist corruption.
|
|
|
|
|
|
// Problem: Thread A owns slab 5, Thread B drains all slabs including 5 → both modify freelist → crash.
|
|
|
|
|
|
// Ownership protocol: MUST bind+owner_cas BEFORE drain (see Fix #3 in tiny_refill.h).
|
|
|
|
|
|
// Remote frees will be drained when the slab is adopted via refill paths.
|
|
|
|
|
|
|
|
|
|
|
|
// Fast path: Direct metadata access (no repeated TLS reads!)
|
|
|
|
|
|
if (meta && meta->freelist == NULL && meta->used < meta->capacity && tls->slab_base) {
|
|
|
|
|
|
// Linear allocation (lazy init)
|
|
|
|
|
|
size_t block_size = g_tiny_class_sizes[tls->ss->size_class];
|
|
|
|
|
|
void* block = (void*)(tls->slab_base + ((size_t)meta->used * block_size));
|
|
|
|
|
|
meta->used++;
|
|
|
|
|
|
// Track active blocks in SuperSlab for conservative reclamation
|
|
|
|
|
|
ss_active_inc(tls->ss);
|
|
|
|
|
|
// Route: slab linear
|
|
|
|
|
|
ROUTE_MARK(11); ROUTE_COMMIT(class_idx, 0x60);
|
|
|
|
|
|
HAK_RET_ALLOC(class_idx, block); // Phase 8.4: Zero hot-path overhead
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (meta && meta->freelist) {
|
|
|
|
|
|
// Freelist allocation
|
|
|
|
|
|
void* block = meta->freelist;
|
|
|
|
|
|
// Safety: bounds/alignment check (debug)
|
|
|
|
|
|
if (__builtin_expect(g_tiny_safe_free, 0)) {
|
|
|
|
|
|
size_t blk = g_tiny_class_sizes[tls->ss->size_class];
|
|
|
|
|
|
uint8_t* base = tiny_slab_base_for(tls->ss, tls->slab_idx);
|
|
|
|
|
|
uintptr_t delta = (uintptr_t)block - (uintptr_t)base;
|
|
|
|
|
|
int align_ok = ((delta % blk) == 0);
|
|
|
|
|
|
int range_ok = (delta / blk) < meta->capacity;
|
|
|
|
|
|
if (!align_ok || !range_ok) {
|
|
|
|
|
|
uintptr_t info = ((uintptr_t)(align_ok ? 1u : 0u) << 32) | (uint32_t)(range_ok ? 1u : 0u);
|
|
|
|
|
|
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)tls->ss->size_class, block, info | 0xA100u);
|
|
|
|
|
|
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return NULL; }
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
void* next = *(void**)block;
|
|
|
|
|
|
meta->freelist = next;
|
|
|
|
|
|
meta->used++;
|
|
|
|
|
|
// Optional: clear freelist bit when becomes empty
|
|
|
|
|
|
do {
|
|
|
|
|
|
static int g_mask_en = -1;
|
|
|
|
|
|
if (__builtin_expect(g_mask_en == -1, 0)) {
|
|
|
|
|
|
const char* e = getenv("HAKMEM_TINY_FREELIST_MASK");
|
|
|
|
|
|
g_mask_en = (e && *e && *e != '0') ? 1 : 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
if (__builtin_expect(g_mask_en, 0) && next == NULL) {
|
|
|
|
|
|
uint32_t bit = (1u << slab_idx);
|
|
|
|
|
|
atomic_fetch_and_explicit(&tls->ss->freelist_mask, ~bit, memory_order_release);
|
|
|
|
|
|
}
|
|
|
|
|
|
} while (0);
|
|
|
|
|
|
// Track active blocks in SuperSlab for conservative reclamation
|
|
|
|
|
|
ss_active_inc(tls->ss);
|
|
|
|
|
|
// Route: slab freelist
|
|
|
|
|
|
ROUTE_MARK(12); ROUTE_COMMIT(class_idx, 0x61);
|
|
|
|
|
|
HAK_RET_ALLOC(class_idx, block); // Phase 8.4: Zero hot-path overhead
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Slow path: Refill TLS slab
|
|
|
|
|
|
SuperSlab* ss = superslab_refill(class_idx);
|
|
|
|
|
|
if (!ss) {
|
|
|
|
|
|
static int log_oom = 0;
|
|
|
|
|
|
if (log_oom < 2) { fprintf(stderr, "[DEBUG] superslab_refill returned NULL (OOM)\n"); log_oom++; }
|
|
|
|
|
|
return NULL; // OOM
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Retry allocation (metadata already cached in superslab_refill)
|
|
|
|
|
|
meta = tls->meta;
|
|
|
|
|
|
|
|
|
|
|
|
// DEBUG: Check each condition (disabled for benchmarks)
|
|
|
|
|
|
// static int log_retry = 0;
|
|
|
|
|
|
// if (log_retry < 2) {
|
|
|
|
|
|
// fprintf(stderr, "[DEBUG] Retry alloc: meta=%p, freelist=%p, used=%u, capacity=%u, slab_base=%p\n",
|
|
|
|
|
|
// (void*)meta, meta ? meta->freelist : NULL,
|
|
|
|
|
|
// meta ? meta->used : 0, meta ? meta->capacity : 0,
|
|
|
|
|
|
// (void*)tls->slab_base);
|
|
|
|
|
|
// log_retry++;
|
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
|
|
if (meta && meta->freelist == NULL && meta->used < meta->capacity && tls->slab_base) {
|
|
|
|
|
|
size_t block_size = g_tiny_class_sizes[ss->size_class];
|
|
|
|
|
|
void* block = (void*)(tls->slab_base + ((size_t)meta->used * block_size));
|
|
|
|
|
|
|
|
|
|
|
|
// Disabled for benchmarks
|
|
|
|
|
|
// static int log_success = 0;
|
|
|
|
|
|
// if (log_success < 2) {
|
|
|
|
|
|
// fprintf(stderr, "[DEBUG] Superslab alloc SUCCESS: ptr=%p, class=%d, used=%u->%u\n",
|
|
|
|
|
|
// block, class_idx, meta->used, meta->used + 1);
|
|
|
|
|
|
// log_success++;
|
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
|
|
meta->used++;
|
|
|
|
|
|
// Track active blocks in SuperSlab for conservative reclamation
|
|
|
|
|
|
ss_active_inc(ss);
|
|
|
|
|
|
HAK_RET_ALLOC(class_idx, block); // Phase 8.4: Zero hot-path overhead
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Disabled for benchmarks
|
|
|
|
|
|
// static int log_fail = 0;
|
|
|
|
|
|
// if (log_fail < 2) {
|
|
|
|
|
|
// fprintf(stderr, "[DEBUG] Retry alloc FAILED - returning NULL\n");
|
|
|
|
|
|
// log_fail++;
|
|
|
|
|
|
// }
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
}
|