diff --git a/core/box/ss_legacy_backend_box.c b/core/box/ss_legacy_backend_box.c index 1667f126..56716e0c 100644 --- a/core/box/ss_legacy_backend_box.c +++ b/core/box/ss_legacy_backend_box.c @@ -135,7 +135,25 @@ void* hak_tiny_alloc_superslab_backend_legacy(int class_idx) } if (meta->used < meta->capacity) { + // CRITICAL FIX: Validate geometry matches current stride (handles C7 1024->2048 upgrade) size_t stride = tiny_block_stride_for_class(class_idx); + size_t usable = (slab_idx == 0) ? SUPERSLAB_SLAB0_USABLE_SIZE : SUPERSLAB_SLAB_USABLE_SIZE; + uint16_t expect_cap = (uint16_t)(usable / stride); + + if (meta->capacity != expect_cap) { + // Stale geometry detected - reinitialize slab with current stride + extern __thread int g_hakmem_lock_depth; + g_hakmem_lock_depth++; + fprintf(stderr, "[LEGACY_FIX_GEOMETRY] ss=%p slab=%d cls=%d: old_cap=%u -> new_cap=%u (stride=%zu)\n", + (void*)chunk, slab_idx, class_idx, + meta->capacity, expect_cap, stride); + g_hakmem_lock_depth--; + + superslab_init_slab(chunk, slab_idx, stride, 0); + meta->class_idx = (uint8_t)class_idx; + meta = &chunk->slabs[slab_idx]; // Reload after reinit + } + size_t offset = (size_t)meta->used * stride; uint8_t* base = (uint8_t*)chunk + SUPERSLAB_SLAB0_DATA_OFFSET diff --git a/core/hakmem_shared_pool.c b/core/hakmem_shared_pool.c index ee5f3798..09d20c42 100644 --- a/core/hakmem_shared_pool.c +++ b/core/hakmem_shared_pool.c @@ -707,6 +707,32 @@ shared_pool_acquire_superslab(void) // ---------- Layer 4: Public API (High-level) ---------- +// Ensure slab geometry matches current class stride (handles upgrades like C7 1024->2048). +static inline void sp_fix_geometry_if_needed(SuperSlab* ss, int slab_idx, int class_idx) +{ + if (!ss || slab_idx < 0 || class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) { + return; + } + TinySlabMeta* meta = &ss->slabs[slab_idx]; + size_t stride = g_tiny_class_sizes[class_idx]; + size_t usable = (slab_idx == 0) ? SUPERSLAB_SLAB0_USABLE_SIZE : SUPERSLAB_SLAB_USABLE_SIZE; + uint16_t expect_cap = (uint16_t)(usable / stride); + + // Reinitialize if capacity is off or class_idx mismatches. + if (meta->class_idx != (uint8_t)class_idx || meta->capacity != expect_cap) { + extern __thread int g_hakmem_lock_depth; + g_hakmem_lock_depth++; + fprintf(stderr, "[SP_FIX_GEOMETRY] ss=%p slab=%d cls=%d: old_cls=%u old_cap=%u -> new_cls=%d new_cap=%u (stride=%zu)\n", + (void*)ss, slab_idx, class_idx, + meta->class_idx, meta->capacity, + class_idx, expect_cap, stride); + g_hakmem_lock_depth--; + + superslab_init_slab(ss, slab_idx, stride, 0 /*owner_tid*/); + meta->class_idx = (uint8_t)class_idx; + } +} + int shared_pool_acquire_slab(int class_idx, SuperSlab** ss_out, int* slab_idx_out) { @@ -751,6 +777,7 @@ shared_pool_acquire_slab(int class_idx, SuperSlab** ss_out, int* slab_idx_out) if (slab_meta->class_idx == (uint8_t)class_idx && slab_meta->capacity > 0 && slab_meta->used < slab_meta->capacity) { + sp_fix_geometry_if_needed(ss, l0_idx, class_idx); if (dbg_acquire == 1) { fprintf(stderr, "[SP_ACQUIRE_STAGE0_L0] class=%d reuse hot slot (ss=%p slab=%d used=%u cap=%u)\n", @@ -975,6 +1002,7 @@ stage2_fallback: *ss_out = ss; *slab_idx_out = claimed_idx; + sp_fix_geometry_if_needed(ss, claimed_idx, class_idx); if (g_lock_stats_enabled == 1) { atomic_fetch_add(&g_lock_release_count, 1); @@ -1123,6 +1151,7 @@ stage2_fallback: *ss_out = new_ss; *slab_idx_out = first_slot; + sp_fix_geometry_if_needed(new_ss, first_slot, class_idx); if (g_lock_stats_enabled == 1) { atomic_fetch_add(&g_lock_release_count, 1); diff --git a/core/hakmem_super_registry.c b/core/hakmem_super_registry.c index d4ca859c..35a23f13 100644 --- a/core/hakmem_super_registry.c +++ b/core/hakmem_super_registry.c @@ -2,6 +2,7 @@ #include "hakmem_tiny_superslab.h" #include #include +#include // munmap for incompatible SuperSlab eviction // Global registry storage SuperRegEntry g_super_reg[SUPER_REG_SIZE]; @@ -366,12 +367,47 @@ SuperSlab* hak_ss_lru_pop(uint8_t size_class) { pthread_mutex_lock(&g_super_reg_lock); - // Find a matching SuperSlab in cache (same size_class) + // Find a compatible SuperSlab in cache (stride must match current config) SuperSlab* curr = g_ss_lru_cache.lru_head; + extern const size_t g_tiny_class_sizes[]; + size_t expected_stride = g_tiny_class_sizes[size_class]; + while (curr) { - // Phase 12: LRU entries are not keyed by ss->size_class; treat any as reusable for now. - if (1) { - // Found match - remove from cache + // Validate: Check if cached SuperSlab slabs match current stride + // This prevents reusing old 1024B SuperSlabs for new 2048B C7 allocations + int is_compatible = 1; + + // Scan active slabs for stride mismatch + int cap = ss_slabs_capacity(curr); + for (int i = 0; i < cap; i++) { + if (curr->slab_bitmap & (1u << i)) { + TinySlabMeta* meta = &curr->slabs[i]; + if (meta->capacity > 0) { + // Calculate implied stride from slab geometry + // Slab 0: 63488B usable, Others: 65536B usable + size_t slab_usable = (i == 0) ? 63488 : 65536; + size_t implied_stride = slab_usable / meta->capacity; + + // Stride mismatch detected + if (implied_stride != expected_stride) { + is_compatible = 0; +#if !HAKMEM_BUILD_RELEASE + static _Atomic uint32_t g_incomp_log = 0; + uint32_t n = atomic_fetch_add(&g_incomp_log, 1); + if (n < 8) { + fprintf(stderr, + "[LRU_INCOMPATIBLE] class=%d ss=%p slab=%d expect_stride=%zu implied=%zu (evicting)\n", + size_class, (void*)curr, i, expected_stride, implied_stride); + } +#endif + break; + } + } + } + } + + if (is_compatible) { + // Compatible - reuse this SuperSlab ss_lru_remove(curr); g_ss_lru_cache.total_count--; size_t ss_size = (size_t)1 << curr->lg_size; @@ -404,7 +440,22 @@ SuperSlab* hak_ss_lru_pop(uint8_t size_class) { return curr; } - curr = curr->lru_next; + + // Incompatible SuperSlab - evict immediately + SuperSlab* next = curr->lru_next; + ss_lru_remove(curr); + g_ss_lru_cache.total_count--; + size_t ss_size = (size_t)1 << curr->lg_size; + g_ss_lru_cache.total_memory_mb -= (ss_size / (1024 * 1024)); + + // Track evictions for observability + static _Atomic uint64_t g_incompatible_evictions = 0; + atomic_fetch_add(&g_incompatible_evictions, 1); + + // Release memory + munmap(curr, ss_size); + + curr = next; } uint32_t cache_count_miss = g_ss_lru_cache.total_count; diff --git a/core/hakmem_tiny_lazy_init.inc.h b/core/hakmem_tiny_lazy_init.inc.h index 4858fef6..8e59de5d 100644 --- a/core/hakmem_tiny_lazy_init.inc.h +++ b/core/hakmem_tiny_lazy_init.inc.h @@ -15,6 +15,7 @@ #include #include +#include // For fprintf #include "superslab/superslab_types.h" // For SuperSlabACEState // ============================================================================ @@ -75,6 +76,16 @@ static inline void lazy_init_class(int class_idx) { tiny_tls_publish_targets(class_idx, base_cap); } + // CRITICAL FIX: Clear TLS SLL (Phase 3d-B unified structure) to purge stale blocks + // This prevents C7 1024B→2048B stride upgrade issues where old misaligned blocks + // remain in TLS SLL from previous runs or initialization paths. + // Note: g_tls_sll is defined in hakmem_tiny_tls_state_box.inc, already visible here + g_tls_sll[class_idx].head = NULL; + g_tls_sll[class_idx].count = 0; +#if !HAKMEM_BUILD_RELEASE + fprintf(stderr, "[LAZY_INIT] Cleared TLS SLL for class %d (purge stale blocks)\n", class_idx); +#endif + // Extract from hak_tiny_init.inc lines 623-625: Per-class lock pthread_mutex_init(&g_tiny_class_locks[class_idx].m, NULL); diff --git a/core/hakmem_tiny_refill_p0.inc.h b/core/hakmem_tiny_refill_p0.inc.h index 21b5e6f2..440ff422 100644 --- a/core/hakmem_tiny_refill_p0.inc.h +++ b/core/hakmem_tiny_refill_p0.inc.h @@ -270,6 +270,32 @@ static inline int sll_refill_batch_from_ss(int class_idx, int max_take) { continue; } + // CRITICAL FIX: Validate geometry before carving to prevent stride mismatch + // (e.g., C7 upgrade from 1024B to 2048B stride) + // This ensures ALL blocks entering TLS SLL have correct alignment. + { + size_t expected_stride = tiny_block_stride_for_class(class_idx); + size_t usable = (tls->slab_idx == 0) ? SUPERSLAB_SLAB0_USABLE_SIZE + : SUPERSLAB_SLAB_USABLE_SIZE; + uint16_t expected_cap = (uint16_t)(usable / expected_stride); + + if (meta->capacity != expected_cap) { + // Stale geometry detected - FULL RESET to prevent misaligned carve + extern __thread int g_hakmem_lock_depth; + g_hakmem_lock_depth++; + fprintf(stderr, + "[CARVE_GEOMETRY_FIX] cls=%d ss=%p slab=%d: capacity %u→%u (stride=%zu) RESET carved=%u\n", + class_idx, (void*)tls->ss, tls->slab_idx, + meta->capacity, expected_cap, expected_stride, meta->carved); + g_hakmem_lock_depth--; + + // Reinitialize with correct stride (resets carved=0, freelist=NULL) + superslab_init_slab(tls->ss, tls->slab_idx, expected_stride, 0); + meta->class_idx = (uint8_t)class_idx; + meta = tls->meta = &tls->ss->slabs[tls->slab_idx]; // Reload after reinit + } + } + uint32_t available = meta->capacity - meta->carved; uint32_t batch = want; if (batch > available) batch = available; diff --git a/core/hakmem_tiny_superslab.h b/core/hakmem_tiny_superslab.h index 3de1f120..4465bfd8 100644 --- a/core/hakmem_tiny_superslab.h +++ b/core/hakmem_tiny_superslab.h @@ -49,7 +49,8 @@ static inline uint64_t hak_now_ns(void) { // byte per block for the header. Class 7 (1024B) remains headerless by design. static inline size_t tiny_block_stride_for_class(int class_idx) { // Local size table (avoid extern dependency for inline function) - static const size_t class_sizes[8] = {8, 16, 32, 64, 128, 256, 512, 1024}; + // CRITICAL: C7 upgraded from 1024B to 2048B stride (Phase C7-Upgrade) + static const size_t class_sizes[8] = {8, 16, 32, 64, 128, 256, 512, 2048}; size_t bs = class_sizes[class_idx]; #if HAKMEM_TINY_HEADER_CLASSIDX // Phase E1-CORRECT: ALL classes have 1-byte header diff --git a/core/tiny_nextptr.h b/core/tiny_nextptr.h index d69214fb..f2046eff 100644 --- a/core/tiny_nextptr.h +++ b/core/tiny_nextptr.h @@ -96,8 +96,8 @@ static inline __attribute__((always_inline)) void tiny_next_store(void* base, in // Misalignment detector: class stride vs base offset do { static _Atomic uint32_t g_next_misalign_log = 0; - extern const size_t g_tiny_class_sizes[]; - size_t stride = (class_idx >= 0 && class_idx < 8) ? g_tiny_class_sizes[class_idx] : 0; + extern size_t tiny_block_stride_for_class(int class_idx); // Includes header if enabled + size_t stride = (class_idx >= 0 && class_idx < 8) ? tiny_block_stride_for_class(class_idx) : 0; if (stride > 0) { uintptr_t delta = ((uintptr_t)base) % stride; if (__builtin_expect(delta != 0, 0)) {