Fix BASE/USER pointer double conversion bugs in alloc/free fast paths

Root Cause:
- TINY_ALLOC_FAST_POP_INLINE returned USER pointer (base+1), but all other
  frontend layers return BASE pointer → HAK_RET_ALLOC wrote header/region
  at wrong offset (off-by-one)
- tiny_free_fast_ss() performed BASE conversion twice (ptr-1 then base-1)
  → Corrupted TLS SLL chain, causing SEGV at iteration 66151

Fixes:
1. tiny_alloc_fast_inline.h (Line 62):
   - Change POP macro to return BASE pointer (not USER)
   - Update PUSH macro to convert USER→BASE and restore header at BASE
   - Unify all frontend layers to "BASE world"

2. tiny_free_fast.inc.h (Line 125, 228):
   - Remove double conversion in tiny_free_fast_ss()
   - Pass BASE pointer from caller (already converted via ptr-1)
   - Add comments to prevent future regressions

Impact:
- Before: Crash at iteration 66151 (stack corruption)
- After: 100K iterations  (1.95M ops/s), 1M iterations  (840K ops/s)

Verified: Random mixed benchmark (WS=256, seeds 42-44), all tests pass.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-11-13 07:43:30 +09:00
parent 72b38bc994
commit c28314fb96
2 changed files with 27 additions and 15 deletions

View File

@ -58,8 +58,8 @@ extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES];
if (g_tls_sll_count[(class_idx)] > 0) { \
g_tls_sll_count[(class_idx)]--; \
} \
/* Phase E1-CORRECT: All classes return user pointer (base+1) */ \
(ptr_out) = (void*)((uint8_t*)_head + 1); \
/* Phase 7: Fast path returns BASE pointer; HAK_RET_ALLOC does BASE→USER */ \
(ptr_out) = _head; \
} \
} else { \
(ptr_out) = NULL; \
@ -88,9 +88,14 @@ extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES];
// byte 0 for HEADER_MAGIC. Without restoration, it finds 0x00 → uses wrong offset → SEGV.
// COST: 1 byte write (~1-2 cycles per free, negligible).
#define TINY_ALLOC_FAST_PUSH_INLINE(class_idx, ptr) do { \
*(uint8_t*)(ptr) = HEADER_MAGIC | ((class_idx) & HEADER_CLASS_MASK); \
tiny_next_write(class_idx, (ptr), g_tls_sll_head[(class_idx)]); \
g_tls_sll_head[(class_idx)] = (ptr); \
if (!(ptr)) break; \
/* Phase E1-CORRECT: API ptr is USER pointer (= base+1). Convert back to BASE. */ \
uint8_t* _base = (uint8_t*)(ptr) - 1; \
/* Restore header at BASE (not at user). */ \
*_base = HEADER_MAGIC | ((class_idx) & HEADER_CLASS_MASK); \
/* Link node using BASE as the canonical SLL node address. */ \
tiny_next_write((class_idx), _base, g_tls_sll_head[(class_idx)]); \
g_tls_sll_head[(class_idx)] = _base; \
g_tls_sll_count[(class_idx)]++; \
} while(0)
#else

View File

@ -91,7 +91,9 @@ static inline int tiny_free_is_same_thread_legacy(TinySlab* slab) {
// ; Delegate to remote path
//
// Expected: 2-3 instructions on same-thread path (1 cmp, 1 load, 1 store)
static inline int tiny_free_fast_ss(SuperSlab* ss, int slab_idx, void* ptr, uint32_t my_tid) {
//
// ⚠️ CRITICAL: ptr parameter must be BASE pointer (already converted from USER via ptr-1)
static inline int tiny_free_fast_ss(SuperSlab* ss, int slab_idx, void* base, uint32_t my_tid) {
// BUGFIX: Validate slab_idx before array access (prevents buffer overflow at ss->slabs[-1])
int cap = ss_slabs_capacity(ss);
if (__builtin_expect(slab_idx < 0 || slab_idx >= cap, 0)) {
@ -105,8 +107,8 @@ static inline int tiny_free_fast_ss(SuperSlab* ss, int slab_idx, void* ptr, uint
free_ss_debug_count++;
int is_same = tiny_free_is_same_thread_ss(ss, slab_idx, my_tid);
extern int g_sfc_enabled;
fprintf(stderr, "[FREE_SS] ptr=%p, cls=%d, same_thread=%d, sfc_enabled=%d\n",
ptr, ss->size_class, is_same, g_sfc_enabled);
fprintf(stderr, "[FREE_SS] base=%p, cls=%d, same_thread=%d, sfc_enabled=%d\n",
base, ss->size_class, is_same, g_sfc_enabled);
}
// Box 6 Boundary: Ownership check (TOCTOU-safe)
@ -120,8 +122,7 @@ static inline int tiny_free_fast_ss(SuperSlab* ss, int slab_idx, void* ptr, uint
// Fast path: Same-thread free (2-3 instructions)
int class_idx = ss->size_class;
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
void* base = (void*)((uint8_t*)ptr - 1);
// Phase E1-CORRECT: base pointer already converted by caller (no double conversion!)
#if HAKMEM_DEBUG_COUNTERS
// Track same-thread frees (compile-time gated)
@ -154,7 +155,9 @@ static inline int tiny_free_fast_ss(SuperSlab* ss, int slab_idx, void* ptr, uint
// Free fast path for Legacy TinySlab-backed allocation
// Returns: 1 on success (same-thread), 0 on failure (cross-thread)
static inline int tiny_free_fast_legacy(TinySlab* slab, void* ptr) {
//
// ⚠️ CRITICAL: ptr parameter must be BASE pointer (already converted from USER via ptr-1)
static inline int tiny_free_fast_legacy(TinySlab* slab, void* base) {
// Box 6 Boundary: Ownership check
if (__builtin_expect(!tiny_free_is_same_thread_legacy(slab), 0)) {
return 0; // Cross-thread → caller should delegate to precise path
@ -162,8 +165,7 @@ static inline int tiny_free_fast_legacy(TinySlab* slab, void* ptr) {
// Fast path: Same-thread free
int class_idx = slab->class_idx;
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
void* base = (void*)((uint8_t*)ptr - 1);
// Phase E1-CORRECT: base pointer already converted by caller (no double conversion!)
// Box 5-NEW/5-OLD integration: Push to TLS freelist (SFC or SLL)
extern int g_sfc_enabled;
@ -223,7 +225,8 @@ static inline void tiny_free_fast(void* ptr) {
uint32_t self_tid = tiny_self_u32();
// Box 6 Boundary: Try same-thread fast path
if (tiny_free_fast_ss(ss, slab_idx, ptr, self_tid)) {
// CRITICAL: Pass BASE pointer (already converted above)
if (tiny_free_fast_ss(ss, slab_idx, base, self_tid)) {
return; // Success: same-thread, pushed to TLS
}
@ -237,8 +240,12 @@ static inline void tiny_free_fast(void* ptr) {
// 2. Legacy TinySlab-backed pointer?
TinySlab* slab = hak_tiny_owner_slab(ptr);
if (__builtin_expect(slab != NULL, 0)) {
// Convert USER → BASE (for Legacy path)
void* base_legacy = (void*)((uint8_t*)ptr - 1);
// Box 6 Boundary: Try same-thread fast path
if (tiny_free_fast_legacy(slab, ptr)) {
// CRITICAL: Pass BASE pointer (already converted above)
if (tiny_free_fast_legacy(slab, base_legacy)) {
return; // Success: same-thread, pushed to TLS
}