Files
hakmem/core/tiny_refill_opt.h

152 lines
5.2 KiB
C
Raw Normal View History

// tiny_refill_opt.h - Inline helpers to batch and splice refill chains
// Box: Refill Boundary optimization helpers (kept header-only)
#pragma once
#include <stdint.h>
#include <stdio.h>
#include <stdatomic.h>
#include <stdlib.h>
#ifndef HAKMEM_TINY_REFILL_OPT
#define HAKMEM_TINY_REFILL_OPT 1
#endif
// Local chain structure (head/tail pointers)
typedef struct TinyRefillChain {
void* head;
void* tail;
uint32_t count;
} TinyRefillChain;
static inline void trc_init(TinyRefillChain* c) {
c->head = NULL; c->tail = NULL; c->count = 0;
}
static inline void refill_opt_dbg(const char* stage, int class_idx, uint32_t n) {
#if HAKMEM_TINY_REFILL_OPT
static int en = -1;
static _Atomic int printed = 0;
if (__builtin_expect(en == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_REFILL_OPT_DEBUG");
en = (e && *e && *e != '0') ? 1 : 0;
}
if (!en) return;
int exp = 0;
if (atomic_compare_exchange_strong(&printed, &exp, 1)) {
fprintf(stderr, "[REFILL_OPT] stage=%s cls=%d n=%u\n", stage ? stage : "(null)", class_idx, (unsigned)n);
fflush(stderr);
}
#else
(void)stage; (void)class_idx; (void)n;
#endif
}
static inline void trc_push_front(TinyRefillChain* c, void* node) {
if (c->head == NULL) {
c->head = node; c->tail = node; *(void**)node = NULL; c->count = 1;
} else {
*(void**)node = c->head; c->head = node; c->count++;
}
}
// Splice local chain into TLS SLL (single meta write)
static inline void trc_splice_to_sll(int class_idx, TinyRefillChain* c,
void** sll_head, uint32_t* sll_count) {
if (!c || c->head == NULL) return;
if (c->tail) {
*(void**)c->tail = *sll_head;
}
*sll_head = c->head;
if (sll_count) *sll_count += c->count;
}
Phase 6-2.3~6-2.5: Critical bug fixes + SuperSlab optimization (WIP) ## Phase 6-2.3: Fix 4T Larson crash (active counter bug) ✅ **Problem:** 4T Larson crashed with "free(): invalid pointer", OOM errors **Root cause:** core/hakmem_tiny_refill_p0.inc.h:103 - P0 batch refill moved freelist blocks to TLS cache - Active counter NOT incremented → double-decrement on free - Counter underflows → SuperSlab appears full → OOM → crash **Fix:** Added ss_active_add(tls->ss, from_freelist); **Result:** 4T stable at 838K ops/s ✅ ## Phase 6-2.4: Fix SEGV in random_mixed/mid_large_mt benchmarks ✅ **Problem:** bench_random_mixed_hakmem, bench_mid_large_mt_hakmem → immediate SEGV **Root cause #1:** core/box/hak_free_api.inc.h:92-95 - "Guess loop" dereferenced unmapped memory when registry lookup failed **Root cause #2:** core/box/hak_free_api.inc.h:115 - Header magic check dereferenced unmapped memory **Fix:** 1. Removed dangerous guess loop (lines 92-95) 2. Added hak_is_memory_readable() check before dereferencing header (core/hakmem_internal.h:277-294 - uses mincore() syscall) **Result:** - random_mixed (2KB): SEGV → 2.22M ops/s ✅ - random_mixed (4KB): SEGV → 2.58M ops/s ✅ - Larson 4T: no regression (838K ops/s) ✅ ## Phase 6-2.5: Performance investigation + SuperSlab fix (WIP) ⚠️ **Problem:** Severe performance gaps (19-26x slower than system malloc) **Investigation:** Task agent identified root cause - hak_is_memory_readable() syscall overhead (100-300 cycles per free) - ALL frees hit unmapped_header_fallback path - SuperSlab lookup NEVER called - Why? g_use_superslab = 0 (disabled by diet mode) **Root cause:** core/hakmem_tiny_init.inc:104-105 - Diet mode (default ON) disables SuperSlab - SuperSlab defaults to 1 (hakmem_config.c:334) - BUT diet mode overrides it to 0 during init **Fix:** Separate SuperSlab from diet mode - SuperSlab: Performance-critical (fast alloc/free) - Diet mode: Memory efficiency (magazine capacity limits only) - Both are independent features, should not interfere **Status:** ⚠️ INCOMPLETE - New SEGV discovered after fix - SuperSlab lookup now works (confirmed via debug output) - But benchmark crashes (Exit 139) after ~20 lookups - Needs further investigation **Files modified:** - core/hakmem_tiny_init.inc:99-109 - Removed diet mode override - PERFORMANCE_INVESTIGATION_REPORT.md - Task agent analysis (303x instruction gap) **Next steps:** - Investigate new SEGV (likely SuperSlab free path bug) - OR: Revert Phase 6-2.5 changes if blocking progress 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 20:31:01 +09:00
static inline int trc_refill_guard_enabled(void) {
static int g_trc_guard = -1;
if (__builtin_expect(g_trc_guard == -1, 0)) {
const char* env = getenv("HAKMEM_TINY_REFILL_FAILFAST");
g_trc_guard = (env && *env) ? ((*env != '0') ? 1 : 0) : 1;
fprintf(stderr, "[TRC_GUARD] failfast=%d env=%s\n", g_trc_guard, env ? env : "(null)");
fflush(stderr);
}
return g_trc_guard;
}
static inline int trc_ptr_is_valid(uintptr_t base, uintptr_t limit, size_t blk, const void* node) {
if (!node || limit <= base) return 1;
uintptr_t addr = (uintptr_t)node;
if (addr < base || addr >= limit) return 0;
if (blk == 0) return 1;
return ((addr - base) % blk) == 0;
}
static inline void trc_failfast_abort(const char* stage,
int class_idx,
uintptr_t base,
uintptr_t limit,
const void* node) {
fprintf(stderr,
"[TRC_FAILFAST] stage=%s cls=%d node=%p base=%p limit=%p\n",
stage ? stage : "(null)",
class_idx,
node,
(void*)base,
(void*)limit);
fflush(stderr);
abort();
}
// Pop up to 'want' nodes from freelist into local chain
static inline uint32_t trc_pop_from_freelist(struct TinySlabMeta* meta,
Phase 6-2.3~6-2.5: Critical bug fixes + SuperSlab optimization (WIP) ## Phase 6-2.3: Fix 4T Larson crash (active counter bug) ✅ **Problem:** 4T Larson crashed with "free(): invalid pointer", OOM errors **Root cause:** core/hakmem_tiny_refill_p0.inc.h:103 - P0 batch refill moved freelist blocks to TLS cache - Active counter NOT incremented → double-decrement on free - Counter underflows → SuperSlab appears full → OOM → crash **Fix:** Added ss_active_add(tls->ss, from_freelist); **Result:** 4T stable at 838K ops/s ✅ ## Phase 6-2.4: Fix SEGV in random_mixed/mid_large_mt benchmarks ✅ **Problem:** bench_random_mixed_hakmem, bench_mid_large_mt_hakmem → immediate SEGV **Root cause #1:** core/box/hak_free_api.inc.h:92-95 - "Guess loop" dereferenced unmapped memory when registry lookup failed **Root cause #2:** core/box/hak_free_api.inc.h:115 - Header magic check dereferenced unmapped memory **Fix:** 1. Removed dangerous guess loop (lines 92-95) 2. Added hak_is_memory_readable() check before dereferencing header (core/hakmem_internal.h:277-294 - uses mincore() syscall) **Result:** - random_mixed (2KB): SEGV → 2.22M ops/s ✅ - random_mixed (4KB): SEGV → 2.58M ops/s ✅ - Larson 4T: no regression (838K ops/s) ✅ ## Phase 6-2.5: Performance investigation + SuperSlab fix (WIP) ⚠️ **Problem:** Severe performance gaps (19-26x slower than system malloc) **Investigation:** Task agent identified root cause - hak_is_memory_readable() syscall overhead (100-300 cycles per free) - ALL frees hit unmapped_header_fallback path - SuperSlab lookup NEVER called - Why? g_use_superslab = 0 (disabled by diet mode) **Root cause:** core/hakmem_tiny_init.inc:104-105 - Diet mode (default ON) disables SuperSlab - SuperSlab defaults to 1 (hakmem_config.c:334) - BUT diet mode overrides it to 0 during init **Fix:** Separate SuperSlab from diet mode - SuperSlab: Performance-critical (fast alloc/free) - Diet mode: Memory efficiency (magazine capacity limits only) - Both are independent features, should not interfere **Status:** ⚠️ INCOMPLETE - New SEGV discovered after fix - SuperSlab lookup now works (confirmed via debug output) - But benchmark crashes (Exit 139) after ~20 lookups - Needs further investigation **Files modified:** - core/hakmem_tiny_init.inc:99-109 - Removed diet mode override - PERFORMANCE_INVESTIGATION_REPORT.md - Task agent analysis (303x instruction gap) **Next steps:** - Investigate new SEGV (likely SuperSlab free path bug) - OR: Revert Phase 6-2.5 changes if blocking progress 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 20:31:01 +09:00
int class_idx,
uintptr_t ss_base,
uintptr_t ss_limit,
size_t block_size,
uint32_t want,
TinyRefillChain* out) {
if (!out || want == 0) return 0;
trc_init(out);
uint32_t taken = 0;
while (taken < want && meta->freelist) {
void* p = meta->freelist;
Phase 6-2.3~6-2.5: Critical bug fixes + SuperSlab optimization (WIP) ## Phase 6-2.3: Fix 4T Larson crash (active counter bug) ✅ **Problem:** 4T Larson crashed with "free(): invalid pointer", OOM errors **Root cause:** core/hakmem_tiny_refill_p0.inc.h:103 - P0 batch refill moved freelist blocks to TLS cache - Active counter NOT incremented → double-decrement on free - Counter underflows → SuperSlab appears full → OOM → crash **Fix:** Added ss_active_add(tls->ss, from_freelist); **Result:** 4T stable at 838K ops/s ✅ ## Phase 6-2.4: Fix SEGV in random_mixed/mid_large_mt benchmarks ✅ **Problem:** bench_random_mixed_hakmem, bench_mid_large_mt_hakmem → immediate SEGV **Root cause #1:** core/box/hak_free_api.inc.h:92-95 - "Guess loop" dereferenced unmapped memory when registry lookup failed **Root cause #2:** core/box/hak_free_api.inc.h:115 - Header magic check dereferenced unmapped memory **Fix:** 1. Removed dangerous guess loop (lines 92-95) 2. Added hak_is_memory_readable() check before dereferencing header (core/hakmem_internal.h:277-294 - uses mincore() syscall) **Result:** - random_mixed (2KB): SEGV → 2.22M ops/s ✅ - random_mixed (4KB): SEGV → 2.58M ops/s ✅ - Larson 4T: no regression (838K ops/s) ✅ ## Phase 6-2.5: Performance investigation + SuperSlab fix (WIP) ⚠️ **Problem:** Severe performance gaps (19-26x slower than system malloc) **Investigation:** Task agent identified root cause - hak_is_memory_readable() syscall overhead (100-300 cycles per free) - ALL frees hit unmapped_header_fallback path - SuperSlab lookup NEVER called - Why? g_use_superslab = 0 (disabled by diet mode) **Root cause:** core/hakmem_tiny_init.inc:104-105 - Diet mode (default ON) disables SuperSlab - SuperSlab defaults to 1 (hakmem_config.c:334) - BUT diet mode overrides it to 0 during init **Fix:** Separate SuperSlab from diet mode - SuperSlab: Performance-critical (fast alloc/free) - Diet mode: Memory efficiency (magazine capacity limits only) - Both are independent features, should not interfere **Status:** ⚠️ INCOMPLETE - New SEGV discovered after fix - SuperSlab lookup now works (confirmed via debug output) - But benchmark crashes (Exit 139) after ~20 lookups - Needs further investigation **Files modified:** - core/hakmem_tiny_init.inc:99-109 - Removed diet mode override - PERFORMANCE_INVESTIGATION_REPORT.md - Task agent analysis (303x instruction gap) **Next steps:** - Investigate new SEGV (likely SuperSlab free path bug) - OR: Revert Phase 6-2.5 changes if blocking progress 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 20:31:01 +09:00
if (__builtin_expect(trc_refill_guard_enabled() &&
!trc_ptr_is_valid(ss_base, ss_limit, block_size, p),
0)) {
trc_failfast_abort("freelist_head", class_idx, ss_base, ss_limit, p);
}
void* next = *(void**)p;
if (__builtin_expect(trc_refill_guard_enabled() &&
!trc_ptr_is_valid(ss_base, ss_limit, block_size, next),
0)) {
trc_failfast_abort("freelist_next", class_idx, ss_base, ss_limit, next);
}
meta->freelist = next;
trc_push_front(out, p);
taken++;
}
// DEBUG REMOVED: refill_opt_dbg causes -26% regression (atomic CAS overhead)
return taken;
}
// Carve a contiguous batch of size 'batch' from linear area, return as chain
static inline uint32_t trc_linear_carve(uint8_t* base, size_t bs,
struct TinySlabMeta* meta,
uint32_t batch,
TinyRefillChain* out) {
if (!out || batch == 0) return 0;
trc_init(out);
uint8_t* cursor = base + ((size_t)meta->used * bs);
void* head = (void*)cursor;
for (uint32_t i = 1; i < batch; i++) {
uint8_t* next = cursor + bs;
*(void**)cursor = (void*)next;
cursor = next;
}
void* tail = (void*)cursor;
meta->used += batch;
out->head = head;
out->tail = tail;
out->count = batch;
// DEBUG REMOVED: refill_opt_dbg causes -26% regression (atomic CAS overhead)
return batch;
}