Tiny: Enable P0 batch refill by default + docs and task update
Summary - Default P0 ON: Build-time HAKMEM_TINY_P0_BATCH_REFILL=1 remains; runtime gate now defaults to ON (HAKMEM_TINY_P0_ENABLE unset or not '0'). Kill switch preserved via HAKMEM_TINY_P0_DISABLE=1. - Fix critical bug: After freelist→SLL batch splice, increment TinySlabMeta::used by 'from_freelist' to mirror non-P0 behavior (prevents under-accounting and follow-on carve invariants from breaking). - Add low-overhead A/B toggles for triage: HAKMEM_TINY_P0_NO_DRAIN (skip remote drain), HAKMEM_TINY_P0_LOG (emit [P0_COUNTER_OK/MISMATCH] based on total_active_blocks delta). - Keep linear carve fail-fast guards across simple/general/TLS-bump paths. Perf (1T, 100k×256B) - P0 OFF: ~2.73M ops/s (stable) - P0 ON (no drain): ~2.45M ops/s - P0 ON (normal drain): ~2.76M ops/s (fastest) Known - Rare [P0_COUNTER_MISMATCH] warnings persist (non-fatal). Continue auditing active/used balance around batch freelist splice and remote drain splice. Docs - Add docs/TINY_P0_BATCH_REFILL.md (runtime switches, behavior, perf notes). - Update CURRENT_TASK.md with Tiny P0 status (default ON) and next steps.
This commit is contained in:
@ -57,22 +57,12 @@ static inline void trc_splice_to_sll(int class_idx, TinyRefillChain* c,
|
||||
void** sll_head, uint32_t* sll_count) {
|
||||
if (!c || c->head == NULL) return;
|
||||
|
||||
// CORRUPTION DEBUG: Validate chain before splicing
|
||||
// CORRUPTION DEBUG: Log chain splice (alignment check removed - false positive)
|
||||
// NOTE: Blocks are stride-aligned from slab base, not absolutely aligned
|
||||
// A slab at 0x1000 with 513B blocks is valid: 0x1000, 0x1201, 0x1402, etc.
|
||||
if (__builtin_expect(trc_refill_guard_enabled(), 0)) {
|
||||
extern const size_t g_tiny_class_sizes[];
|
||||
// Validate alignment using effective stride (include header for classes 0..6)
|
||||
size_t blk = g_tiny_class_sizes[class_idx] + ((class_idx != 7) ? 1 : 0);
|
||||
|
||||
fprintf(stderr, "[SPLICE_TO_SLL] cls=%d head=%p tail=%p count=%u\n",
|
||||
class_idx, c->head, c->tail, c->count);
|
||||
|
||||
// Check alignment of chain head
|
||||
if (((uintptr_t)c->head % blk) != 0) {
|
||||
fprintf(stderr, "[SPLICE_CORRUPT] Chain head %p misaligned (blk=%zu offset=%zu)!\n",
|
||||
c->head, blk, (uintptr_t)c->head % blk);
|
||||
fprintf(stderr, "[SPLICE_CORRUPT] Corruption detected BEFORE writing to TLS!\n");
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
if (c->tail) {
|
||||
@ -83,18 +73,23 @@ static inline void trc_splice_to_sll(int class_idx, TinyRefillChain* c,
|
||||
}
|
||||
|
||||
static inline int trc_refill_guard_enabled(void) {
|
||||
#if HAKMEM_BUILD_RELEASE
|
||||
return 0; // Always disabled in release builds
|
||||
#else
|
||||
// FIX: Allow runtime override even in release builds for debugging
|
||||
static int g_trc_guard = -1;
|
||||
if (__builtin_expect(g_trc_guard == -1, 0)) {
|
||||
const char* env = getenv("HAKMEM_TINY_REFILL_FAILFAST");
|
||||
#if HAKMEM_BUILD_RELEASE
|
||||
// Release: Default OFF, but allow explicit enable
|
||||
g_trc_guard = (env && *env && *env != '0') ? 1 : 0;
|
||||
#else
|
||||
// Debug: Default ON, but allow explicit disable
|
||||
g_trc_guard = (env && *env) ? ((*env != '0') ? 1 : 0) : 1;
|
||||
fprintf(stderr, "[TRC_GUARD] failfast=%d env=%s\n", g_trc_guard, env ? env : "(null)");
|
||||
#endif
|
||||
fprintf(stderr, "[TRC_GUARD] failfast=%d env=%s mode=%s\n",
|
||||
g_trc_guard, env ? env : "(null)",
|
||||
HAKMEM_BUILD_RELEASE ? "release" : "debug");
|
||||
fflush(stderr);
|
||||
}
|
||||
return g_trc_guard;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int trc_ptr_is_valid(uintptr_t base, uintptr_t limit, size_t blk, const void* node) {
|
||||
@ -188,12 +183,8 @@ static inline uint32_t trc_linear_carve(uint8_t* base, size_t bs,
|
||||
}
|
||||
|
||||
// FIX: Use carved counter (monotonic) instead of used (which decrements on free)
|
||||
// Effective stride: account for Tiny header when enabled (classes 0..6)
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
size_t stride = (bs == 1024 ? bs : (bs + 1));
|
||||
#else
|
||||
// Caller passes bs as the effective stride already (includes header when enabled)
|
||||
size_t stride = bs;
|
||||
#endif
|
||||
uint8_t* cursor = base + ((size_t)meta->carved * stride);
|
||||
void* head = (void*)cursor;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user