Files
hakmem/core/tiny_refill_opt.h
Moe Charm (CI) 1010a961fb Tiny: fix header/stride mismatch and harden refill paths
- Root cause: header-based class indexing (HEADER_CLASSIDX=1) wrote a 1-byte
  header during allocation, but linear carve/refill and initial slab capacity
  still used bare class block sizes. This mismatch could overrun slab usable
  space and corrupt freelists, causing reproducible SEGV at ~100k iters.

Changes
- Superslab: compute capacity with effective stride (block_size + header for
  classes 0..6; class7 remains headerless) in superslab_init_slab(). Add a
  debug-only bound check in superslab_alloc_from_slab() to fail fast if carve
  would exceed usable bytes.
- Refill (non-P0 and P0): use header-aware stride for all linear carving and
  TLS window bump operations. Ensure alignment/validation in tiny_refill_opt.h
  also uses stride, not raw class size.
- Drain: keep existing defense-in-depth for remote sentinel and sanitize nodes
  before splicing into freelist (already present).

Notes
- This unifies the memory layout across alloc/linear-carve/refill with a single
  stride definition and keeps class7 (1024B) headerless as designed.
- Debug builds add fail-fast checks; release builds remain lean.

Next
- Re-run Tiny benches (256/1024B) in debug to confirm stability, then in
  release. If any remaining crash persists, bisect with HAKMEM_TINY_P0_BATCH_REFILL=0
  to isolate P0 batch carve, and continue reducing branch-miss as planned.
2025-11-09 18:55:50 +09:00

221 lines
8.5 KiB
C

// tiny_refill_opt.h - Inline helpers to batch and splice refill chains
// Box: Refill Boundary optimization helpers (kept header-only)
#pragma once
#include <stdint.h>
#include <stdio.h>
#include <stdatomic.h>
#include <stdlib.h>
#ifndef HAKMEM_TINY_REFILL_OPT
#define HAKMEM_TINY_REFILL_OPT 1
#endif
// Local chain structure (head/tail pointers)
typedef struct TinyRefillChain {
void* head;
void* tail;
uint32_t count;
} TinyRefillChain;
static inline void trc_init(TinyRefillChain* c) {
c->head = NULL; c->tail = NULL; c->count = 0;
}
static inline void refill_opt_dbg(const char* stage, int class_idx, uint32_t n) {
#if HAKMEM_TINY_REFILL_OPT
static int en = -1;
static _Atomic int printed = 0;
if (__builtin_expect(en == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_REFILL_OPT_DEBUG");
en = (e && *e && *e != '0') ? 1 : 0;
}
if (!en) return;
int exp = 0;
if (atomic_compare_exchange_strong(&printed, &exp, 1)) {
fprintf(stderr, "[REFILL_OPT] stage=%s cls=%d n=%u\n", stage ? stage : "(null)", class_idx, (unsigned)n);
fflush(stderr);
}
#else
(void)stage; (void)class_idx; (void)n;
#endif
}
static inline void trc_push_front(TinyRefillChain* c, void* node) {
if (c->head == NULL) {
c->head = node; c->tail = node; *(void**)node = NULL; c->count = 1;
} else {
*(void**)node = c->head; c->head = node; c->count++;
}
}
// Forward declaration of guard function
static inline int trc_refill_guard_enabled(void);
// Splice local chain into TLS SLL (single meta write)
static inline void trc_splice_to_sll(int class_idx, TinyRefillChain* c,
void** sll_head, uint32_t* sll_count) {
if (!c || c->head == NULL) return;
// CORRUPTION DEBUG: Validate chain before splicing
if (__builtin_expect(trc_refill_guard_enabled(), 0)) {
extern const size_t g_tiny_class_sizes[];
// Validate alignment using effective stride (include header for classes 0..6)
size_t blk = g_tiny_class_sizes[class_idx] + ((class_idx != 7) ? 1 : 0);
fprintf(stderr, "[SPLICE_TO_SLL] cls=%d head=%p tail=%p count=%u\n",
class_idx, c->head, c->tail, c->count);
// Check alignment of chain head
if (((uintptr_t)c->head % blk) != 0) {
fprintf(stderr, "[SPLICE_CORRUPT] Chain head %p misaligned (blk=%zu offset=%zu)!\n",
c->head, blk, (uintptr_t)c->head % blk);
fprintf(stderr, "[SPLICE_CORRUPT] Corruption detected BEFORE writing to TLS!\n");
abort();
}
}
if (c->tail) {
*(void**)c->tail = *sll_head;
}
*sll_head = c->head;
if (sll_count) *sll_count += c->count;
}
static inline int trc_refill_guard_enabled(void) {
#if HAKMEM_BUILD_RELEASE
return 0; // Always disabled in release builds
#else
static int g_trc_guard = -1;
if (__builtin_expect(g_trc_guard == -1, 0)) {
const char* env = getenv("HAKMEM_TINY_REFILL_FAILFAST");
g_trc_guard = (env && *env) ? ((*env != '0') ? 1 : 0) : 1;
fprintf(stderr, "[TRC_GUARD] failfast=%d env=%s\n", g_trc_guard, env ? env : "(null)");
fflush(stderr);
}
return g_trc_guard;
#endif
}
static inline int trc_ptr_is_valid(uintptr_t base, uintptr_t limit, size_t blk, const void* node) {
if (!node || limit <= base) return 1;
uintptr_t addr = (uintptr_t)node;
if (addr < base || addr >= limit) return 0;
if (blk == 0) return 1;
return ((addr - base) % blk) == 0;
}
static inline void trc_failfast_abort(const char* stage,
int class_idx,
uintptr_t base,
uintptr_t limit,
const void* node) {
fprintf(stderr,
"[TRC_FAILFAST] stage=%s cls=%d node=%p base=%p limit=%p\n",
stage ? stage : "(null)",
class_idx,
node,
(void*)base,
(void*)limit);
fflush(stderr);
abort();
}
// Pop up to 'want' nodes from freelist into local chain
static inline uint32_t trc_pop_from_freelist(struct TinySlabMeta* meta,
int class_idx,
uintptr_t ss_base,
uintptr_t ss_limit,
size_t block_size,
uint32_t want,
TinyRefillChain* out) {
if (!out || want == 0) return 0;
trc_init(out);
uint32_t taken = 0;
while (taken < want && meta->freelist) {
void* p = meta->freelist;
if (__builtin_expect(trc_refill_guard_enabled() &&
!trc_ptr_is_valid(ss_base, ss_limit, block_size, p),
0)) {
fprintf(stderr, "[FREELIST_CORRUPT] Reading freelist head: p=%p (ss_base=%p ss_limit=%p blk=%zu)\n",
p, (void*)ss_base, (void*)ss_limit, block_size);
fprintf(stderr, "[FREELIST_CORRUPT] Head pointer is corrupted (invalid range/alignment)\n");
trc_failfast_abort("freelist_head", class_idx, ss_base, ss_limit, p);
}
void* next = *(void**)p;
if (__builtin_expect(trc_refill_guard_enabled() &&
!trc_ptr_is_valid(ss_base, ss_limit, block_size, next),
0)) {
fprintf(stderr, "[FREELIST_CORRUPT] Reading freelist node: p=%p next=%p (ss_base=%p ss_limit=%p blk=%zu)\n",
p, next, (void*)ss_base, (void*)ss_limit, block_size);
fprintf(stderr, "[FREELIST_CORRUPT] Next pointer is corrupted (cls=%d taken=%u/%u)\n",
class_idx, taken, want);
// Log offset details
if (next != NULL) {
uintptr_t offset = (uintptr_t)next - ss_base;
size_t expected_align = offset % block_size;
fprintf(stderr, "[FREELIST_CORRUPT] Corrupted offset=%zu (0x%zx) expected_align=%zu\n",
offset, offset, expected_align);
}
trc_failfast_abort("freelist_next", class_idx, ss_base, ss_limit, next);
}
meta->freelist = next;
trc_push_front(out, p);
taken++;
}
// DEBUG REMOVED: refill_opt_dbg causes -26% regression (atomic CAS overhead)
return taken;
}
// Carve a contiguous batch of size 'batch' from linear area, return as chain
static inline uint32_t trc_linear_carve(uint8_t* base, size_t bs,
struct TinySlabMeta* meta,
uint32_t batch,
TinyRefillChain* out) {
if (!out || batch == 0) return 0;
trc_init(out);
// FIX: Use carved (monotonic) instead of used (decrements on free)
// CORRUPTION DEBUG: Validate capacity before carving
if (__builtin_expect(trc_refill_guard_enabled(), 0)) {
if (meta->carved + batch > meta->capacity) {
fprintf(stderr, "[LINEAR_CARVE_CORRUPT] Carving beyond capacity!\n");
fprintf(stderr, "[LINEAR_CARVE_CORRUPT] carved=%u batch=%u capacity=%u (would be %u)\n",
meta->carved, batch, meta->capacity, meta->carved + batch);
fprintf(stderr, "[LINEAR_CARVE_CORRUPT] base=%p bs=%zu\n", (void*)base, bs);
abort();
}
}
// FIX: Use carved counter (monotonic) instead of used (which decrements on free)
// Effective stride: account for Tiny header when enabled (classes 0..6)
#if HAKMEM_TINY_HEADER_CLASSIDX
size_t stride = (bs == 1024 ? bs : (bs + 1));
#else
size_t stride = bs;
#endif
uint8_t* cursor = base + ((size_t)meta->carved * stride);
void* head = (void*)cursor;
// CORRUPTION DEBUG: Log carve operation
if (__builtin_expect(trc_refill_guard_enabled(), 0)) {
fprintf(stderr, "[LINEAR_CARVE] base=%p carved=%u batch=%u cursor=%p\n",
(void*)base, meta->carved, batch, (void*)cursor);
}
for (uint32_t i = 1; i < batch; i++) {
uint8_t* next = cursor + stride;
*(void**)cursor = (void*)next;
cursor = next;
}
void* tail = (void*)cursor;
// FIX: Update both carved (monotonic) and used (active count)
meta->carved += batch;
meta->used += batch;
out->head = head;
out->tail = tail;
out->count = batch;
// DEBUG REMOVED: refill_opt_dbg causes -26% regression (atomic CAS overhead)
return batch;
}