Tiny C7(1KB) SEGV triage hardening: always-on lightweight free-time guards for headerless class7 in both hak_tiny_free_with_slab and superslab free path (alignment/range check, fail-fast via SIGUSR2). Leave C7 P0/direct-FC gated OFF by default. Add docs/TINY_C7_1KB_SEGV_TRIAGE.md for Claude with repro matrix, hypotheses, instrumentation and acceptance criteria.

This commit is contained in:
Moe Charm (CI)
2025-11-10 01:59:11 +09:00
parent 94e7d54a17
commit d55ee48459
5 changed files with 112 additions and 3 deletions

View File

@ -109,7 +109,9 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
return; return;
} }
// Optional: cross-lookup TinySlab owner and detect class mismatch early // Optional: cross-lookup TinySlab owner and detect class mismatch early
if (__builtin_expect(g_tiny_safe_free, 0)) { // Class7(1KB)はヘッダ無しのため、releaseでも軽量ガードを常時適用
// それ以外は従来どおり g_tiny_safe_free で切替
if (__builtin_expect(g_tiny_safe_free || class_idx == 7, 0)) {
TinySlab* ts = hak_tiny_owner_slab(ptr); TinySlab* ts = hak_tiny_owner_slab(ptr);
if (ts) { if (ts) {
int ts_cls = ts->class_idx; int ts_cls = ts->class_idx;
@ -130,7 +132,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
return; return;
} }
TinySlabMeta* meta = &ss->slabs[slab_idx]; TinySlabMeta* meta = &ss->slabs[slab_idx];
if (__builtin_expect(g_tiny_safe_free, 0)) { if (__builtin_expect(g_tiny_safe_free || class_idx == 7, 0)) {
size_t blk = g_tiny_class_sizes[class_idx]; size_t blk = g_tiny_class_sizes[class_idx];
uint8_t* base = tiny_slab_base_for(ss, slab_idx); uint8_t* base = tiny_slab_base_for(ss, slab_idx);
uintptr_t delta = (uintptr_t)ptr - (uintptr_t)base; uintptr_t delta = (uintptr_t)ptr - (uintptr_t)base;
@ -143,7 +145,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
if (range_ok) code |= 0x1u; if (range_ok) code |= 0x1u;
uintptr_t aux = tiny_remote_pack_diag(code, ss_base, ss_size, (uintptr_t)ptr); uintptr_t aux = tiny_remote_pack_diag(code, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)class_idx, ptr, aux); tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)class_idx, ptr, aux);
if (g_tiny_safe_free_strict) { raise(SIGUSR2); return; } if (g_tiny_safe_free_strict || class_idx == 7) { raise(SIGUSR2); return; }
return; return;
} }
} }

View File

@ -242,6 +242,8 @@ static inline int sll_refill_small_from_ss(int class_idx, int max_take) {
if (!tls->ss) { if (!tls->ss) {
// Try to obtain a SuperSlab for this class // Try to obtain a SuperSlab for this class
if (superslab_refill(class_idx) == NULL) return 0; if (superslab_refill(class_idx) == NULL) return 0;
// CRITICAL FIX: Reload tls pointer after superslab_refill() binds new slab
tls = &g_tls_slabs[class_idx];
} }
TinySlabMeta* meta = tls->meta; TinySlabMeta* meta = tls->meta;
if (!meta) return 0; if (!meta) return 0;
@ -291,6 +293,8 @@ static inline int sll_refill_small_from_ss(int class_idx, int max_take) {
} }
// Need another slab with space // Need another slab with space
if (__builtin_expect(superslab_refill(class_idx) == NULL, 0)) break; if (__builtin_expect(superslab_refill(class_idx) == NULL, 0)) break;
// CRITICAL FIX: Reload tls pointer after superslab_refill() binds new slab
tls = &g_tls_slabs[class_idx];
meta = tls->meta; // refresh after refill meta = tls->meta; // refresh after refill
} }
return taken; return taken;
@ -323,6 +327,8 @@ static inline int sll_refill_small_from_ss(int class_idx, int max_take) {
} else { } else {
// Move to another slab with space // Move to another slab with space
if (superslab_refill(class_idx) == NULL) break; if (superslab_refill(class_idx) == NULL) break;
// CRITICAL FIX: Reload tls pointer after superslab_refill() binds new slab
tls = &g_tls_slabs[class_idx];
meta = tls->meta; // refresh after refill meta = tls->meta; // refresh after refill
continue; continue;
} }

View File

@ -41,6 +41,16 @@ static inline int p0_should_log(void) {
} }
static inline int sll_refill_batch_from_ss(int class_idx, int max_take) { static inline int sll_refill_batch_from_ss(int class_idx, int max_take) {
// Conservative guard: class7(1KB) uses legacy path by default until fully stabilized.
// Opt-in via HAKMEM_TINY_P0_C7_ENABLE=1
if (__builtin_expect(class_idx == 7, 0)) {
static int c7_en = -1;
if (c7_en == -1) {
const char* e = getenv("HAKMEM_TINY_P0_C7_ENABLE");
c7_en = (e && *e && *e != '0') ? 1 : 0;
}
if (!c7_en) return 0;
}
// Runtime A/B kill switch (defensive). Set HAKMEM_TINY_P0_DISABLE=1 to bypass P0 path. // Runtime A/B kill switch (defensive). Set HAKMEM_TINY_P0_DISABLE=1 to bypass P0 path.
do { do {
static int g_p0_disable = -1; static int g_p0_disable = -1;

View File

@ -77,6 +77,23 @@ static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss) {
} }
#endif // !HAKMEM_BUILD_RELEASE #endif // !HAKMEM_BUILD_RELEASE
// Lightweight guard always-on for class7 (headerless, 1024B): prevent corrupted pointer writes in release
if (__builtin_expect(ss->size_class == 7, 0)) {
size_t blk = g_tiny_class_sizes[ss->size_class];
uint8_t* base = tiny_slab_base_for(ss, slab_idx);
uintptr_t delta = (uintptr_t)ptr - (uintptr_t)base;
int cap_ok = (meta->capacity > 0) ? 1 : 0;
int align_ok = (delta % blk) == 0;
int range_ok = cap_ok && (delta / blk) < meta->capacity;
if (!align_ok || !range_ok) {
uintptr_t aux = tiny_remote_pack_diag(0xA107u, ss_base, ss_size, (uintptr_t)ptr);
tiny_debug_ring_record(TINY_RING_EVENT_REMOTE_INVALID, (uint16_t)ss->size_class, ptr, aux);
// Fail-fast in class7 to avoid silent SLL/freelist corruption
raise(SIGUSR2);
return;
}
}
// Phase 6.23: Same-thread check // Phase 6.23: Same-thread check
uint32_t my_tid = tiny_self_u32(); uint32_t my_tid = tiny_self_u32();
const int debug_guard = g_debug_remote_guard; const int debug_guard = g_debug_remote_guard;

View File

@ -0,0 +1,74 @@
TINY 1KB (class7) SEGV Triage Plan
=================================
Scope
- Reproducible SEGV on fixed-size 1KB bench: `./bench_fixed_size_hakmem 200000 1024 128`
- Persists with Direct-FC OFF. Likely in non-direct P0 or legacy refill path.
- Goal: isolate failing path, capture backtrace, prove root cause, and patch with minimal deltas.
Quick Repro Matrix
- Release (baseline):
- `./build.sh release bench_fixed_size_hakmem`
- `./bench_fixed_size_hakmem 200000 1024 128` → SEGV
- Disable P0 (all classes):
- `HAKMEM_TINY_P0_DISABLE=1 ./bench_fixed_size_hakmem 200000 1024 128` → Check (SEGV persists?)
- Disable remote drain:
- `HAKMEM_TINY_P0_NO_DRAIN=1 ./bench_fixed_size_hakmem 200000 1024 128` → Check
- Assume 1T (disable remote side-table):
- `HAKMEM_TINY_ASSUME_1T=1 ./bench_fixed_size_hakmem 200000 1024 128` → Check
Debug Build + Guards
1) Build debug flavor
- `./build.sh debug bench_fixed_size_hakmem`
2) Strong safety/guards
- `export HAKMEM_TINY_SAFE_FREE_STRICT=1`
- `export HAKMEM_TINY_DEBUG_REMOTE_GUARD=1`
- `export HAKMEM_INVALID_FREE_LOG=1`
- `export HAKMEM_TINY_RF_FORCE_NOTIFY=1`
3) Run under gdb
- `gdb --args ./bench_fixed_size_hakmem 200000 1024 128`
- `(gdb) run`
- On crash: `(gdb) bt`, `(gdb) frame 0`, `(gdb) p/x *meta`, `(gdb) p tls->slab_idx`, `(gdb) p tls->ss`, `(gdb) p meta->used`, `(gdb) p meta->carved`, `(gdb) p meta->capacity`
Hypotheses (ranked)
1) Capacity/stride mismatch in class7 carve
- class7 uses stride=1024 (no 1B header). Any code calculating with `bs = class_size + 1` will overstep.
- Check: `superslab_init_slab()` capacity, and any linear carve helper uses the same stride consistently.
2) TLS slab switch with stale pointers (already fixed for P0 direct path; check legacy/P0-general)
- After `superslab_refill()`, ensure `tls = &g_tls_slabs[c]; meta = tls->meta;` reloaded before counters/linear carve.
3) Remote drain corrupts freelist
- Verify sentinel cleared; ensure drain happens before freelist pop; check class7 path uses same ordering.
Files to Inspect
- `core/tiny_superslab_alloc.inc.h` (superslab_refill, adopt_bind_if_safe, stride/capacity)
- `core/hakmem_tiny_refill.inc.h` (legacy SLL refill, carve/pop ordering, bounds checks)
- `core/hakmem_tiny_refill_p0.inc.h` (P0 general path C7 is currently guarded OFF for direct-FC; confirm P0 batch not entering for C7)
- `core/superslab/superslab_inline.h` (remote drain, sentinel guard)
Instrumentation to Add (debug-only)
- In `superslab_init_slab(ss, idx, class_size, tid)`:
- Compute `stride = class_size + (class_idx != 7 ? 1 : 0)`; assert `meta->capacity == usable/stride`.
- In linear carve path (legacy + P0-general):
- Before write: assert `meta->carved < meta->capacity`; compute base and assert `ptr < slab_base+usable`.
- After `superslab_refill()` in any loop: rebind `tls/meta` unconditionally.
Bisect Switches
- Kill P0 entirely: `HAKMEM_TINY_P0_DISABLE=1`
- Skip remote drain: `HAKMEM_TINY_P0_NO_DRAIN=1`
- Assume ST mode: `HAKMEM_TINY_ASSUME_1T=1`
- Disable simplified refills (if applicable): `HAKMEM_TINY_SIMPLE_REFILL=0` (add if not present)
Patch Strategy (expected minimal fix)
1) Make class7 stride consistently 1024 in all carve paths (no +1 header). Audit bs computations.
2) Ensure tls/meta rebind after every `superslab_refill()` in non-direct paths.
3) Enforce drain-before-pop ordering and sentinel clear.
Acceptance Criteria
- `./bench_fixed_size_hakmem 200000 1024 128` passes 3/3 without SEGV.
- Debug counters show `active_delta == taken` (no mismatch).
- No invalid-free logs under STRICT mode.
Notes
- We already defaulted C7 DirectFC to OFF and guarded P0 entry for C7 unless explicitly enabled (`HAKMEM_TINY_P0_C7_ENABLE=1`).
- Focus on legacy/P0-general carve paths for C7.