Tiny: Enable P0 batch refill by default + docs and task update

Summary
- Default P0 ON: Build-time HAKMEM_TINY_P0_BATCH_REFILL=1 remains; runtime gate now defaults to ON
  (HAKMEM_TINY_P0_ENABLE unset or not '0'). Kill switch preserved via HAKMEM_TINY_P0_DISABLE=1.
- Fix critical bug: After freelist→SLL batch splice, increment TinySlabMeta::used by 'from_freelist'
  to mirror non-P0 behavior (prevents under-accounting and follow-on carve invariants from breaking).
- Add low-overhead A/B toggles for triage: HAKMEM_TINY_P0_NO_DRAIN (skip remote drain),
  HAKMEM_TINY_P0_LOG (emit [P0_COUNTER_OK/MISMATCH] based on total_active_blocks delta).
- Keep linear carve fail-fast guards across simple/general/TLS-bump paths.

Perf (1T, 100k×256B)
- P0 OFF: ~2.73M ops/s (stable)
- P0 ON (no drain): ~2.45M ops/s
- P0 ON (normal drain): ~2.76M ops/s (fastest)

Known
- Rare [P0_COUNTER_MISMATCH] warnings persist (non-fatal). Continue auditing active/used
  balance around batch freelist splice and remote drain splice.

Docs
- Add docs/TINY_P0_BATCH_REFILL.md (runtime switches, behavior, perf notes).
- Update CURRENT_TASK.md with Tiny P0 status (default ON) and next steps.
This commit is contained in:
Moe Charm (CI)
2025-11-09 22:12:34 +09:00
parent 1010a961fb
commit d9b334b968
24 changed files with 1240 additions and 69 deletions

View File

@ -57,22 +57,12 @@ static inline void trc_splice_to_sll(int class_idx, TinyRefillChain* c,
void** sll_head, uint32_t* sll_count) {
if (!c || c->head == NULL) return;
// CORRUPTION DEBUG: Validate chain before splicing
// CORRUPTION DEBUG: Log chain splice (alignment check removed - false positive)
// NOTE: Blocks are stride-aligned from slab base, not absolutely aligned
// A slab at 0x1000 with 513B blocks is valid: 0x1000, 0x1201, 0x1402, etc.
if (__builtin_expect(trc_refill_guard_enabled(), 0)) {
extern const size_t g_tiny_class_sizes[];
// Validate alignment using effective stride (include header for classes 0..6)
size_t blk = g_tiny_class_sizes[class_idx] + ((class_idx != 7) ? 1 : 0);
fprintf(stderr, "[SPLICE_TO_SLL] cls=%d head=%p tail=%p count=%u\n",
class_idx, c->head, c->tail, c->count);
// Check alignment of chain head
if (((uintptr_t)c->head % blk) != 0) {
fprintf(stderr, "[SPLICE_CORRUPT] Chain head %p misaligned (blk=%zu offset=%zu)!\n",
c->head, blk, (uintptr_t)c->head % blk);
fprintf(stderr, "[SPLICE_CORRUPT] Corruption detected BEFORE writing to TLS!\n");
abort();
}
}
if (c->tail) {
@ -83,18 +73,23 @@ static inline void trc_splice_to_sll(int class_idx, TinyRefillChain* c,
}
static inline int trc_refill_guard_enabled(void) {
#if HAKMEM_BUILD_RELEASE
return 0; // Always disabled in release builds
#else
// FIX: Allow runtime override even in release builds for debugging
static int g_trc_guard = -1;
if (__builtin_expect(g_trc_guard == -1, 0)) {
const char* env = getenv("HAKMEM_TINY_REFILL_FAILFAST");
#if HAKMEM_BUILD_RELEASE
// Release: Default OFF, but allow explicit enable
g_trc_guard = (env && *env && *env != '0') ? 1 : 0;
#else
// Debug: Default ON, but allow explicit disable
g_trc_guard = (env && *env) ? ((*env != '0') ? 1 : 0) : 1;
fprintf(stderr, "[TRC_GUARD] failfast=%d env=%s\n", g_trc_guard, env ? env : "(null)");
#endif
fprintf(stderr, "[TRC_GUARD] failfast=%d env=%s mode=%s\n",
g_trc_guard, env ? env : "(null)",
HAKMEM_BUILD_RELEASE ? "release" : "debug");
fflush(stderr);
}
return g_trc_guard;
#endif
}
static inline int trc_ptr_is_valid(uintptr_t base, uintptr_t limit, size_t blk, const void* node) {
@ -188,12 +183,8 @@ static inline uint32_t trc_linear_carve(uint8_t* base, size_t bs,
}
// FIX: Use carved counter (monotonic) instead of used (which decrements on free)
// Effective stride: account for Tiny header when enabled (classes 0..6)
#if HAKMEM_TINY_HEADER_CLASSIDX
size_t stride = (bs == 1024 ? bs : (bs + 1));
#else
// Caller passes bs as the effective stride already (includes header when enabled)
size_t stride = bs;
#endif
uint8_t* cursor = base + ((size_t)meta->carved * stride);
void* head = (void*)cursor;