Files
hakmem/core/superslab/superslab_inline.h
Moe Charm (CI) 6ae0db9fd2 Fix: workset=8192 SEGV - Align slab_index_for to Box3 geometry (iteration 2)
Problem:
After Box3 geometry unification (commit 2fe970252), workset=8192 still SEGVs:
- 200K iterations:  OK
- 300K iterations:  SEGV

Root Cause (identified by ChatGPT):
Header/metadata class mismatches around 300K iterations:
- [HDR_META_MISMATCH] hdr_cls=6 meta_cls=5
- [FREE_FAST_HDR_META_MISMATCH] hdr_cls=5 meta_cls=4
- [TLS_SLL_PUSH_META_MISMATCH] cls=5 meta_cls=4

Cause: slab_index_for() geometry mismatch with Box3
- tiny_slab_base_for_geometry() (Box3):
    - Slab 0: ss + SUPERSLAB_SLAB0_DATA_OFFSET
    - Slab 1: ss + 1*SLAB_SIZE
    - Slab k: ss + k*SLAB_SIZE

- Old slab_index_for():
    rel = p - (base + SUPERSLAB_SLAB0_DATA_OFFSET);
    idx = rel / SLAB_SIZE;

- Result: Off-by-one for slab_idx > 0
    Example: tiny_slab_base_for_geometry(ss, 4) returns 0x...40000
             slab_index_for(ss, 0x...40000) returns 3 (wrong!)

Impact:
- Block allocated in "C6 slab 4" appears to be in "C5 slab 3"
- Header class_idx (C6) != meta->class_idx (C5)
- TLS SLL corruption → SEGV after extended runs

Fix: core/superslab/superslab_inline.h
======================================
Rewrite slab_index_for() as inverse of Box3 geometry:

  static inline int slab_index_for(SuperSlab* ss, void* ptr) {
      // ... bounds checks ...

      // Slab 0: special case (has metadata offset)
      if (p < base + SLAB_SIZE) {
          return 0;
      }

      // Slab 1+: simple SLAB_SIZE spacing from base
      size_t rel = p - base;  // ← Changed from (p - base - OFFSET)
      int idx = (int)(rel / SLAB_SIZE);
      return idx;
  }

Verification:
- slab_index_for(ss, tiny_slab_base_for_geometry(ss, idx)) == idx 
- Consistent for any address within slab

Test Results:
=============
workset=8192 SEGV threshold improved further:

Before this fix (after 2fe970252):
   200K iterations: OK
   300K iterations: SEGV

After this fix:
   220K iterations: OK (15.5M ops/s)
   240K iterations: SEGV (different bug)

Progress:
- Iteration 1 (2fe970252): 0 → 200K stable
- Iteration 2 (this fix):  200K → 220K stable
- Total improvement: ∞ → 220K iterations (+10% stability)

Known Issues:
- 240K+ still SEGVs (suspected: TLS SLL double-free, per ChatGPT)
- Debug builds may show TLS_SLL_PUSH FATAL double-free detection
- Requires further investigation of free path

Impact:
- No performance regression in stable range
- Header/metadata mismatch errors eliminated
- workset=256 unaffected: 60M+ ops/s maintained

Credit: Root cause analysis and fix by ChatGPT

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-22 07:56:06 +09:00

164 lines
5.4 KiB
C

#ifndef SUPERSLAB_INLINE_H
#define SUPERSLAB_INLINE_H
#include "superslab_types.h"
#include "../tiny_box_geometry.h" // Box 3 geometry helpers (stride/base/capacity)
// Forward declaration for unsafe remote drain used by refill/handle paths
// Implemented in hakmem_tiny_superslab.c
void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_idx, TinySlabMeta* meta);
// Optional debug counter (defined in hakmem_tiny_superslab.c)
extern _Atomic uint64_t g_ss_active_dec_calls;
// Return maximum number of slabs for this SuperSlab based on lg_size.
static inline int ss_slabs_capacity(SuperSlab* ss)
{
if (!ss) return 0;
size_t ss_size = (size_t)1 << ss->lg_size;
return (int)(ss_size / SLAB_SIZE);
}
// Compute slab base pointer for given (ss, slab_idx).
// Box 5 wrapper: delegate to Box 3 canonical geometry helper.
static inline uint8_t* tiny_slab_base_for(SuperSlab* ss, int slab_idx)
{
if (!ss || slab_idx < 0) {
return NULL;
}
return tiny_slab_base_for_geometry(ss, slab_idx);
}
// Compute slab index for a pointer inside ss.
// Box 5 wrapper: inverse of Box 3 geometry (tiny_slab_base_for_geometry).
// Layout (data regions):
// - Slab 0: [ss + SUPERSLAB_SLAB0_DATA_OFFSET, ss + SLAB_SIZE)
// - Slab 1: [ss + 1*SLAB_SIZE, ss + 2*SLAB_SIZE)
// - Slab k: [ss + k*SLAB_SIZE, ss + (k+1)*SLAB_SIZE)
static inline int slab_index_for(SuperSlab* ss, void* ptr)
{
if (!ss || !ptr) {
return -1;
}
uintptr_t base = (uintptr_t)ss;
uintptr_t p = (uintptr_t)ptr;
size_t ss_size = (size_t)1 << ss->lg_size;
// Outside overall SuperSlab range
if (p < base + SUPERSLAB_SLAB0_DATA_OFFSET || p >= base + ss_size) {
return -1;
}
// Slab 0: from first data byte up to the end of first slab
if (p < base + SLAB_SIZE) {
return 0;
}
// Slabs 1+ use simple SLAB_SIZE spacing from SuperSlab base
size_t rel = p - base;
int idx = (int)(rel / SLAB_SIZE);
if (idx < 0 || idx >= SLABS_PER_SUPERSLAB_MAX) {
return -1;
}
return idx;
}
// Simple ref helpers used by lifecycle paths.
static inline uint32_t superslab_ref_get(SuperSlab* ss)
{
return ss ? atomic_load_explicit(&ss->refcount, memory_order_acquire) : 0;
}
static inline void superslab_ref_inc(SuperSlab* ss)
{
if (ss) {
atomic_fetch_add_explicit(&ss->refcount, 1, memory_order_acq_rel);
}
}
static inline void superslab_ref_dec(SuperSlab* ss)
{
if (ss) {
uint32_t prev = atomic_fetch_sub_explicit(&ss->refcount, 1, memory_order_acq_rel);
(void)prev; // caller decides when to free; we just provide the primitive
}
}
// Ownership helpers (Box 3)
static inline int ss_owner_try_acquire(TinySlabMeta* m, uint32_t tid)
{
if (!m) return 0;
uint8_t want = (uint8_t)(tid & 0xFFu);
uint8_t expected = 0;
return __atomic_compare_exchange_n(&m->owner_tid_low, &expected, want,
false, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
}
static inline void ss_owner_release(TinySlabMeta* m, uint32_t tid)
{
if (!m) return;
uint8_t expected = (uint8_t)(tid & 0xFFu);
(void)__atomic_compare_exchange_n(&m->owner_tid_low, &expected, 0u,
false, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
}
static inline int ss_owner_is_mine(TinySlabMeta* m, uint32_t tid)
{
if (!m) return 0;
uint8_t cur = __atomic_load_n(&m->owner_tid_low, __ATOMIC_RELAXED);
return cur == (uint8_t)(tid & 0xFFu);
}
// Active block accounting (saturating dec by 1)
static inline void ss_active_dec_one(SuperSlab* ss)
{
if (!ss) return;
atomic_fetch_add_explicit(&g_ss_active_dec_calls, 1, memory_order_relaxed);
uint32_t cur = atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed);
while (cur != 0) {
if (atomic_compare_exchange_weak_explicit(&ss->total_active_blocks,
&cur,
cur - 1u,
memory_order_acq_rel,
memory_order_relaxed)) {
return;
}
// cur updated by failed CAS; loop
}
}
// Remote push helper (Box 2):
// - Enqueue node to per-slab MPSC stack
// - Returns 1 if transition empty->nonempty, otherwise 0
// - Also decrements ss->total_active_blocks once (free completed)
static inline int ss_remote_push(SuperSlab* ss, int slab_idx, void* node)
{
if (!ss || slab_idx < 0 || slab_idx >= SLABS_PER_SUPERSLAB_MAX || !node) {
return -1;
}
_Atomic uintptr_t* head = &ss->remote_heads[slab_idx];
uintptr_t old_head;
uintptr_t new_head;
int transitioned = 0;
do {
old_head = atomic_load_explicit(head, memory_order_acquire);
// next ポインタは tiny_next_ptr_box / tiny_nextptr 等で扱う前提だが、
// ここでは単純に単方向リストとして積む(上位が decode する)。
*(uintptr_t*)node = old_head;
new_head = (uintptr_t)node;
} while (!atomic_compare_exchange_weak_explicit(
head, &old_head, new_head,
memory_order_release, memory_order_relaxed));
transitioned = (old_head == 0) ? 1 : 0;
atomic_fetch_add_explicit(&ss->remote_counts[slab_idx], 1, memory_order_acq_rel);
// account active block removal once per free
ss_active_dec_one(ss);
return transitioned;
}
#endif // SUPERSLAB_INLINE_H