diff --git a/core/hakmem_tiny.c b/core/hakmem_tiny.c index 7ef58638..833e2927 100644 --- a/core/hakmem_tiny.c +++ b/core/hakmem_tiny.c @@ -372,6 +372,10 @@ static inline void tiny_debug_track_alloc_ret(int cls, void* ptr) { if (slab_idx < 0) { tiny_failfast_abort_ptr("alloc_ret_slabidx", ss, slab_idx, ptr, "slab_idx_mismatch"); } else { + // Fail-Fast: class vs SuperSlab size_class must be consistent. + if (ss->size_class != cls) { + tiny_failfast_abort_ptr("alloc_ret_cls_mismatch", ss, slab_idx, ptr, "class_mismatch"); + } size_t blk = g_tiny_class_sizes[cls]; uintptr_t base = (uintptr_t)tiny_slab_base_for(ss, slab_idx); uintptr_t delta = (uintptr_t)ptr - base; @@ -856,6 +860,10 @@ SuperSlab* ss_partial_adopt(int class_idx) { } static inline void tiny_tls_bind_slab(TinyTLSSlab* tls, SuperSlab* ss, int slab_idx) { + // Canonical binding: + // - ss->size_class defines block size for this SuperSlab + // - slab_idx is the owning slab index within ss + // - slab_base is ALWAYS derived from tiny_slab_base_for(ss, slab_idx) tls->ss = ss; tls->slab_idx = (uint8_t)slab_idx; tls->meta = &ss->slabs[slab_idx]; diff --git a/core/hakmem_tiny_refill.inc.h b/core/hakmem_tiny_refill.inc.h index c77acb7a..fc1b4969 100644 --- a/core/hakmem_tiny_refill.inc.h +++ b/core/hakmem_tiny_refill.inc.h @@ -220,7 +220,8 @@ static inline int sll_refill_small_from_ss(int class_idx, int max_take) { ss_active_inc(tls->ss); } else if (meta->used < meta->capacity) { void* slab_start = slab_data_start(tls->ss, tls->slab_idx); - if (tls->slab_idx == 0) slab_start = (char*)slab_start + 1024; + // ULTRATHINK FIX: Use aligned offset (2048) for slab 0 + if (tls->slab_idx == 0) slab_start = (char*)slab_start + 2048; p = (char*)slab_start + ((size_t)meta->used * bs); meta->used++; // Track active blocks reserved into TLS SLL @@ -274,7 +275,8 @@ static inline void* superslab_tls_bump_fast(int class_idx) { if (chunk > avail) chunk = avail; size_t bs = g_tiny_class_sizes[tls->ss->size_class]; void* slab_start = slab_data_start(tls->ss, tls->slab_idx); - if (tls->slab_idx == 0) slab_start = (char*)slab_start + 1024; + // ULTRATHINK FIX: Use aligned offset (2048) for slab 0 + if (tls->slab_idx == 0) slab_start = (char*)slab_start + 2048; uint8_t* base = tls->slab_base ? tls->slab_base : tiny_slab_base_for(tls->ss, tls->slab_idx); uint8_t* start = base + ((size_t)used * bs); // Reserve the chunk once in header (keeps remote-free accounting valid) diff --git a/core/hakmem_tiny_superslab.c b/core/hakmem_tiny_superslab.c index 0f8ef63c..62a181d3 100644 --- a/core/hakmem_tiny_superslab.c +++ b/core/hakmem_tiny_superslab.c @@ -538,15 +538,13 @@ void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_ return; } - // Get slab data region (skip header in first slab) - void* slab_start = slab_data_start(ss, slab_idx); - if (slab_idx == 0) { - // First slab: skip SuperSlab header (64B) + metadata (512B) = 576B - slab_start = (char*)slab_start + 1024; // Align to 1KB for safety - } - - // Calculate capacity - size_t usable_size = (slab_idx == 0) ? (SLAB_SIZE - 1024) : SLAB_SIZE; + // Calculate capacity using canonical tiny_slab_base_for() layout: + // - slab_data_start(ss, slab_idx) = SuperSlab base + slab_idx * SLAB_SIZE + // - tiny_slab_base_for(ss, 0) = SuperSlab base + SUPERSLAB_SLAB0_DATA_OFFSET + // - tiny_slab_base_for(ss, i>0) = slab_data_start (no gap) + // + // Phase 6-2.5: Use constants from hakmem_tiny_superslab_constants.h + size_t usable_size = (slab_idx == 0) ? SUPERSLAB_SLAB0_USABLE_SIZE : SUPERSLAB_SLAB_USABLE_SIZE; int capacity = (int)(usable_size / block_size); // Phase 6.24: Lazy freelist initialization diff --git a/core/hakmem_tiny_superslab.h b/core/hakmem_tiny_superslab.h index 765634a7..bffdbdac 100644 --- a/core/hakmem_tiny_superslab.h +++ b/core/hakmem_tiny_superslab.h @@ -17,6 +17,7 @@ #include #include "tiny_debug_ring.h" #include "tiny_remote.h" +#include "hakmem_tiny_superslab_constants.h" // Phase 6-2.5: Centralized layout constants // Debug instrumentation flags (defined in hakmem_tiny.c) extern int g_debug_remote_guard; @@ -35,7 +36,8 @@ uint32_t tiny_remote_drain_threshold(void); #define SUPERSLAB_LG_MIN 20 // lg(1MB) #define SUPERSLAB_LG_DEFAULT 21 // Default: 2MB (syscall reduction, ACE will adapt) -#define SLAB_SIZE (64 * 1024) // 64KB per slab (fixed) +// Phase 6-2.5: SLAB_SIZE now defined in hakmem_tiny_superslab_constants.h +// #define SLAB_SIZE (64 * 1024) // 64KB per slab (fixed) // Legacy defines (kept for backward compatibility, use lg_size instead) #define SUPERSLAB_SIZE SUPERSLAB_SIZE_MAX // Default to 2MB (syscall reduction) @@ -236,7 +238,10 @@ static inline void* slab_data_start(SuperSlab* ss, int slab_idx) { static inline uint8_t* tiny_slab_base_for(SuperSlab* ss, int slab_idx) { uint8_t* base = (uint8_t*)slab_data_start(ss, slab_idx); - if (slab_idx == 0) base += 1024; + // Phase 6-2.5 FIX: Use SUPERSLAB_SLAB0_DATA_OFFSET constant + // sizeof(SuperSlab)=1088, aligned to next 1024-boundary=2048 + // This ensures proper alignment for class 7 (1024-byte blocks) + if (slab_idx == 0) base += SUPERSLAB_SLAB0_DATA_OFFSET; return base; } diff --git a/core/hakmem_tiny_superslab_constants.h b/core/hakmem_tiny_superslab_constants.h new file mode 100644 index 00000000..021bb687 --- /dev/null +++ b/core/hakmem_tiny_superslab_constants.h @@ -0,0 +1,59 @@ +// hakmem_tiny_superslab_constants.h - SuperSlab Layout Constants +// Purpose: Centralize all SuperSlab layout magic numbers +// Phase 6-2.5: Created to fix sizeof(SuperSlab) vs hardcoded offset mismatch + +#ifndef HAKMEM_TINY_SUPERSLAB_CONSTANTS_H +#define HAKMEM_TINY_SUPERSLAB_CONSTANTS_H + +// ============================================================================ +// SuperSlab Layout Constants +// ============================================================================ + +// Size of each slab within SuperSlab (fixed, never changes) +#define SLAB_SIZE (64 * 1024) // 64KB per slab + +// SuperSlab struct size (as of Phase 6-2.5) +// Actual value: sizeof(SuperSlab) = 1088 bytes +// This includes: magic, lg_size, size_class, total_active_blocks, +// remote_heads[], slabs[], slab_listed[], etc. +#define SUPERSLAB_HEADER_SIZE 1088 + +// Slab 0 data offset (CRITICAL: Must be aligned to largest block size) +// Phase 6-2.5 FIX: Changed from 1024 to 2048 +// +// Why 2048? +// - sizeof(SuperSlab) = 1088 bytes +// - Largest block size = 1024 bytes (class 7) +// - Must round up to next 1024-byte boundary: (1088 + 1023) & ~1023 = 2048 +// +// Layout: +// [0..1087] SuperSlab header (1088 bytes) +// [1088..2047] Padding (960 bytes, unused) +// [2048..65535] Slab 0 data (63488 bytes = 64KB - 2048) +// +// Previous value (1024) caused: +// - 64-byte overlap with SuperSlab metadata (corruption) +// - Misalignment for class 7 allocations (1024 % 1024 != 0) +#define SUPERSLAB_SLAB0_DATA_OFFSET 2048 + +// Slab 0 usable size (for capacity calculation) +#define SUPERSLAB_SLAB0_USABLE_SIZE (SLAB_SIZE - SUPERSLAB_SLAB0_DATA_OFFSET) // 63488 bytes + +// Regular slab (i > 0) usable size +#define SUPERSLAB_SLAB_USABLE_SIZE SLAB_SIZE // 65536 bytes + +// ============================================================================ +// Validation (compile-time check) +// ============================================================================ + +// Ensure SLAB0_DATA_OFFSET is aligned to largest block size (1024) +#if (SUPERSLAB_SLAB0_DATA_OFFSET % 1024) != 0 +#error "SUPERSLAB_SLAB0_DATA_OFFSET must be 1024-byte aligned for class 7" +#endif + +// Ensure SLAB0_DATA_OFFSET is large enough to contain SuperSlab header +#if SUPERSLAB_SLAB0_DATA_OFFSET < SUPERSLAB_HEADER_SIZE +#error "SUPERSLAB_SLAB0_DATA_OFFSET must be >= sizeof(SuperSlab)" +#endif + +#endif // HAKMEM_TINY_SUPERSLAB_CONSTANTS_H diff --git a/core/tiny_superslab_alloc.inc.h b/core/tiny_superslab_alloc.inc.h index 0defca86..6549697e 100644 --- a/core/tiny_superslab_alloc.inc.h +++ b/core/tiny_superslab_alloc.inc.h @@ -69,16 +69,10 @@ static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx) { // Phase 6.24: Linear allocation mode (freelist == NULL) // This avoids the 4000-8000 cycle cost of building freelist on init if (meta->freelist == NULL && meta->used < meta->capacity) { - // Linear allocation: sequential memory access (cache-friendly!) + // Linear allocation: use canonical tiny_slab_base_for() only size_t block_size = g_tiny_class_sizes[ss->size_class]; - void* slab_start = slab_data_start(ss, slab_idx); - - // First slab: skip SuperSlab header - if (slab_idx == 0) { - slab_start = (char*)slab_start + 1024; - } - - void* block = (char*)slab_start + (meta->used * block_size); + uint8_t* base = tiny_slab_base_for(ss, slab_idx); + void* block = (void*)(base + ((size_t)meta->used * block_size)); meta->used++; tiny_remote_track_on_alloc(ss, slab_idx, block, "linear_alloc", 0); tiny_remote_assert_not_remote(ss, slab_idx, block, "linear_alloc_ret", 0); @@ -436,6 +430,23 @@ static inline void* hak_tiny_alloc_superslab(int class_idx) { TinySlabMeta* meta = tls->meta; int slab_idx = tls->slab_idx; if (meta && slab_idx >= 0 && tls->ss) { + // CRITICAL: Verify class consistency BEFORE using tls->ss + // If tls->ss->size_class != class_idx, unbind and refill + if (tls->ss->size_class != class_idx) { + // Class mismatch: TLS is bound to wrong SuperSlab + // This happens when TLS was previously bound to different class + tls->ss = NULL; + tls->meta = NULL; + tls->slab_idx = -1; + tls->slab_base = NULL; + meta = NULL; // Force refill path below + } else { + // Ensure TLS view is consistent with canonical slab_base + uint8_t* canonical = tiny_slab_base_for(tls->ss, slab_idx); + if (tls->slab_base != canonical) { + tls->slab_base = canonical; + } + } // A/B: Relaxed read for remote head presence check static int g_alloc_remote_relax = -1; // env: HAKMEM_TINY_ALLOC_REMOTE_RELAX=1 → relaxed if (__builtin_expect(g_alloc_remote_relax == -1, 0)) { @@ -463,8 +474,63 @@ static inline void* hak_tiny_alloc_superslab(int class_idx) { if (meta && meta->freelist == NULL && meta->used < meta->capacity && tls->slab_base) { // Linear allocation (lazy init) size_t block_size = g_tiny_class_sizes[tls->ss->size_class]; - void* block = (void*)(tls->slab_base + ((size_t)meta->used * block_size)); + uint8_t* base = tls->slab_base; // tls_slab_base は tiny_slab_base_for(ss, slab_idx) 由来(唯一の真実) + + // ULTRATHINK DEBUG: Capture the 53-byte mystery + if (tiny_refill_failfast_level() >= 3 && tls->ss->size_class == 7 && slab_idx == 0) { + fprintf(stderr, "[ULTRA_53_DEBUG] === Before allocation ===\n"); + fprintf(stderr, "[ULTRA_53_DEBUG] ss=%p, slab_idx=%d, class=%d\n", + tls->ss, slab_idx, tls->ss->size_class); + fprintf(stderr, "[ULTRA_53_DEBUG] block_size=%zu, meta->used=%d, meta->capacity=%d\n", + block_size, meta->used, meta->capacity); + fprintf(stderr, "[ULTRA_53_DEBUG] tls->slab_base=%p\n", base); + fprintf(stderr, "[ULTRA_53_DEBUG] tiny_slab_base_for(ss,%d)=%p\n", + slab_idx, tiny_slab_base_for(tls->ss, slab_idx)); + fprintf(stderr, "[ULTRA_53_DEBUG] sizeof(SuperSlab)=%zu\n", sizeof(SuperSlab)); + fprintf(stderr, "[ULTRA_53_DEBUG] Expected base should be: ss + %zu\n", sizeof(SuperSlab)); + fprintf(stderr, "[ULTRA_53_DEBUG] Actual base is: ss + 1024\n"); + fprintf(stderr, "[ULTRA_53_DEBUG] Base error: %zu - 1024 = %zu bytes\n", + sizeof(SuperSlab), sizeof(SuperSlab) - 1024); + } + + void* block = (void*)(base + ((size_t)meta->used * block_size)); + + // ULTRATHINK DEBUG: After calculation + if (tiny_refill_failfast_level() >= 3 && tls->ss->size_class == 7 && slab_idx == 0) { + size_t offset_from_ss = (uintptr_t)block - (uintptr_t)tls->ss; + size_t expected_offset = 1024 + ((size_t)meta->used * block_size); + fprintf(stderr, "[ULTRA_53_DEBUG] === Calculated block address ===\n"); + fprintf(stderr, "[ULTRA_53_DEBUG] block=%p\n", block); + fprintf(stderr, "[ULTRA_53_DEBUG] offset from ss=%zu (0x%zx)\n", offset_from_ss, offset_from_ss); + fprintf(stderr, "[ULTRA_53_DEBUG] expected offset=%zu (0x%zx)\n", expected_offset, expected_offset); + fprintf(stderr, "[ULTRA_53_DEBUG] difference=%zd bytes\n", + (ssize_t)offset_from_ss - (ssize_t)expected_offset); + } + meta->used++; + + // Fail-Fast: self-check(デバッグ時のみ有効) + if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) { + uintptr_t base_ss = (uintptr_t)tls->ss; + size_t ss_size = (size_t)1ULL << tls->ss->lg_size; + uintptr_t limit_ss = base_ss + ss_size; + uintptr_t p = (uintptr_t)block; + size_t off = (p >= base_ss) ? (size_t)(p - base_ss) : 0; + int in_range = (p >= base_ss) && (p < limit_ss); + int aligned = ((p - (uintptr_t)base) % block_size) == 0; + int idx_ok = (tls->slab_idx >= 0) && (tls->slab_idx < ss_slabs_capacity(tls->ss)); + if (!in_range || !aligned || !idx_ok || meta->used > (uint32_t)meta->capacity) { + tiny_failfast_abort_ptr("alloc_ret_align", + tls->ss, + tls->slab_idx, + block, + !in_range ? "out_of_range" + : (!aligned ? "misaligned" + : (!idx_ok ? "bad_slab_idx" + : "over_capacity"))); + } + } + // Track active blocks in SuperSlab for conservative reclamation ss_active_inc(tls->ss); // Route: slab linear