Phase 6-2.5: Fix SuperSlab alignment bug + refactor constants
## Problem: 53-byte misalignment mystery **Symptom:** All SuperSlab allocations misaligned by exactly 53 bytes ``` [TRC_FAILFAST_PTR] stage=alloc_ret_align cls=7 ptr=0x..f835 offset=63541 (expected: 63488) Diff: 63541 - 63488 = 53 bytes ``` ## Root Cause (Ultrathink investigation) **sizeof(SuperSlab) != hardcoded offset:** - `sizeof(SuperSlab)` = 1088 bytes (actual struct size) - `tiny_slab_base_for()` used: 1024 (hardcoded) - `superslab_init_slab()` assumed: 2048 (in capacity calc) **Impact:** 1. Memory corruption: 64-byte overlap with SuperSlab metadata 2. Misalignment: 1088 % 1024 = 64 (violates class 7 alignment) 3. Inconsistency: Init assumed 2048, but runtime used 1024 ## Solution ### 1. Centralize constants (NEW) **File:** `core/hakmem_tiny_superslab_constants.h` - `SLAB_SIZE` = 64KB - `SUPERSLAB_HEADER_SIZE` = 1088 - `SUPERSLAB_SLAB0_DATA_OFFSET` = 2048 (aligned to 1024) - `SUPERSLAB_SLAB0_USABLE_SIZE` = 63488 (64KB - 2048) - Compile-time validation checks **Why 2048?** - Round up 1088 to next 1024-byte boundary - Ensures proper alignment for class 7 (1024-byte blocks) - Previous: (1088 + 1023) & ~1023 = 2048 ### 2. Update all code to use constants - `hakmem_tiny_superslab.h`: `tiny_slab_base_for()` → use `SUPERSLAB_SLAB0_DATA_OFFSET` - `hakmem_tiny_superslab.c`: `superslab_init_slab()` → use `SUPERSLAB_SLAB0_USABLE_SIZE` - Removed hardcoded 1024, 2048 magic numbers ### 3. Add class consistency check **File:** `core/tiny_superslab_alloc.inc.h:433-449` - Verify `tls->ss->size_class == class_idx` before allocation - Unbind TLS if mismatch detected - Prevents using wrong block_size for calculations ## Status ⚠️ **INCOMPLETE - New issue discovered** After fix, benchmark hits different error: ``` [TRC_FAILFAST] stage=freelist_next cls=7 node=0x...d474 ``` Freelist corruption detected. Likely caused by: - 2048 offset change affects free() path - Block addresses no longer match freelist expectations - Needs further investigation ## Files Modified - `core/hakmem_tiny_superslab_constants.h` - NEW: Centralized constants - `core/hakmem_tiny_superslab.h` - Use SUPERSLAB_SLAB0_DATA_OFFSET - `core/hakmem_tiny_superslab.c` - Use SUPERSLAB_SLAB0_USABLE_SIZE - `core/tiny_superslab_alloc.inc.h` - Add class consistency check - `core/hakmem_tiny_init.inc` - Remove diet mode override (Phase 6-2.5) - `core/hakmem_super_registry.h` - Remove debug output (cleaned) - `PERFORMANCE_INVESTIGATION_REPORT.md` - Task agent analysis ## Next Steps 1. Investigate freelist corruption with 2048 offset 2. Verify free() path uses tiny_slab_base_for() correctly 3. Consider reverting to 1024 and fixing capacity calculation instead 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -69,16 +69,10 @@ static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx) {
|
||||
// Phase 6.24: Linear allocation mode (freelist == NULL)
|
||||
// This avoids the 4000-8000 cycle cost of building freelist on init
|
||||
if (meta->freelist == NULL && meta->used < meta->capacity) {
|
||||
// Linear allocation: sequential memory access (cache-friendly!)
|
||||
// Linear allocation: use canonical tiny_slab_base_for() only
|
||||
size_t block_size = g_tiny_class_sizes[ss->size_class];
|
||||
void* slab_start = slab_data_start(ss, slab_idx);
|
||||
|
||||
// First slab: skip SuperSlab header
|
||||
if (slab_idx == 0) {
|
||||
slab_start = (char*)slab_start + 1024;
|
||||
}
|
||||
|
||||
void* block = (char*)slab_start + (meta->used * block_size);
|
||||
uint8_t* base = tiny_slab_base_for(ss, slab_idx);
|
||||
void* block = (void*)(base + ((size_t)meta->used * block_size));
|
||||
meta->used++;
|
||||
tiny_remote_track_on_alloc(ss, slab_idx, block, "linear_alloc", 0);
|
||||
tiny_remote_assert_not_remote(ss, slab_idx, block, "linear_alloc_ret", 0);
|
||||
@ -436,6 +430,23 @@ static inline void* hak_tiny_alloc_superslab(int class_idx) {
|
||||
TinySlabMeta* meta = tls->meta;
|
||||
int slab_idx = tls->slab_idx;
|
||||
if (meta && slab_idx >= 0 && tls->ss) {
|
||||
// CRITICAL: Verify class consistency BEFORE using tls->ss
|
||||
// If tls->ss->size_class != class_idx, unbind and refill
|
||||
if (tls->ss->size_class != class_idx) {
|
||||
// Class mismatch: TLS is bound to wrong SuperSlab
|
||||
// This happens when TLS was previously bound to different class
|
||||
tls->ss = NULL;
|
||||
tls->meta = NULL;
|
||||
tls->slab_idx = -1;
|
||||
tls->slab_base = NULL;
|
||||
meta = NULL; // Force refill path below
|
||||
} else {
|
||||
// Ensure TLS view is consistent with canonical slab_base
|
||||
uint8_t* canonical = tiny_slab_base_for(tls->ss, slab_idx);
|
||||
if (tls->slab_base != canonical) {
|
||||
tls->slab_base = canonical;
|
||||
}
|
||||
}
|
||||
// A/B: Relaxed read for remote head presence check
|
||||
static int g_alloc_remote_relax = -1; // env: HAKMEM_TINY_ALLOC_REMOTE_RELAX=1 → relaxed
|
||||
if (__builtin_expect(g_alloc_remote_relax == -1, 0)) {
|
||||
@ -463,8 +474,63 @@ static inline void* hak_tiny_alloc_superslab(int class_idx) {
|
||||
if (meta && meta->freelist == NULL && meta->used < meta->capacity && tls->slab_base) {
|
||||
// Linear allocation (lazy init)
|
||||
size_t block_size = g_tiny_class_sizes[tls->ss->size_class];
|
||||
void* block = (void*)(tls->slab_base + ((size_t)meta->used * block_size));
|
||||
uint8_t* base = tls->slab_base; // tls_slab_base は tiny_slab_base_for(ss, slab_idx) 由来(唯一の真実)
|
||||
|
||||
// ULTRATHINK DEBUG: Capture the 53-byte mystery
|
||||
if (tiny_refill_failfast_level() >= 3 && tls->ss->size_class == 7 && slab_idx == 0) {
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] === Before allocation ===\n");
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] ss=%p, slab_idx=%d, class=%d\n",
|
||||
tls->ss, slab_idx, tls->ss->size_class);
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] block_size=%zu, meta->used=%d, meta->capacity=%d\n",
|
||||
block_size, meta->used, meta->capacity);
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] tls->slab_base=%p\n", base);
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] tiny_slab_base_for(ss,%d)=%p\n",
|
||||
slab_idx, tiny_slab_base_for(tls->ss, slab_idx));
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] sizeof(SuperSlab)=%zu\n", sizeof(SuperSlab));
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] Expected base should be: ss + %zu\n", sizeof(SuperSlab));
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] Actual base is: ss + 1024\n");
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] Base error: %zu - 1024 = %zu bytes\n",
|
||||
sizeof(SuperSlab), sizeof(SuperSlab) - 1024);
|
||||
}
|
||||
|
||||
void* block = (void*)(base + ((size_t)meta->used * block_size));
|
||||
|
||||
// ULTRATHINK DEBUG: After calculation
|
||||
if (tiny_refill_failfast_level() >= 3 && tls->ss->size_class == 7 && slab_idx == 0) {
|
||||
size_t offset_from_ss = (uintptr_t)block - (uintptr_t)tls->ss;
|
||||
size_t expected_offset = 1024 + ((size_t)meta->used * block_size);
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] === Calculated block address ===\n");
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] block=%p\n", block);
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] offset from ss=%zu (0x%zx)\n", offset_from_ss, offset_from_ss);
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] expected offset=%zu (0x%zx)\n", expected_offset, expected_offset);
|
||||
fprintf(stderr, "[ULTRA_53_DEBUG] difference=%zd bytes\n",
|
||||
(ssize_t)offset_from_ss - (ssize_t)expected_offset);
|
||||
}
|
||||
|
||||
meta->used++;
|
||||
|
||||
// Fail-Fast: self-check(デバッグ時のみ有効)
|
||||
if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
|
||||
uintptr_t base_ss = (uintptr_t)tls->ss;
|
||||
size_t ss_size = (size_t)1ULL << tls->ss->lg_size;
|
||||
uintptr_t limit_ss = base_ss + ss_size;
|
||||
uintptr_t p = (uintptr_t)block;
|
||||
size_t off = (p >= base_ss) ? (size_t)(p - base_ss) : 0;
|
||||
int in_range = (p >= base_ss) && (p < limit_ss);
|
||||
int aligned = ((p - (uintptr_t)base) % block_size) == 0;
|
||||
int idx_ok = (tls->slab_idx >= 0) && (tls->slab_idx < ss_slabs_capacity(tls->ss));
|
||||
if (!in_range || !aligned || !idx_ok || meta->used > (uint32_t)meta->capacity) {
|
||||
tiny_failfast_abort_ptr("alloc_ret_align",
|
||||
tls->ss,
|
||||
tls->slab_idx,
|
||||
block,
|
||||
!in_range ? "out_of_range"
|
||||
: (!aligned ? "misaligned"
|
||||
: (!idx_ok ? "bad_slab_idx"
|
||||
: "over_capacity")));
|
||||
}
|
||||
}
|
||||
|
||||
// Track active blocks in SuperSlab for conservative reclamation
|
||||
ss_active_inc(tls->ss);
|
||||
// Route: slab linear
|
||||
|
||||
Reference in New Issue
Block a user