Fix ptr_user_to_base_blind regression: use class-aware base calculation and correct slab index lookup

This commit is contained in:
Moe Charm (CI)
2025-12-03 12:29:31 +09:00
parent c2716f5c01
commit c91602f181
4 changed files with 65 additions and 55 deletions

View File

@ -29,6 +29,7 @@
#include "superslab/superslab_inline.h" // For slab_index_for (cross-thread check)
#include "box/ss_slab_meta_box.h" // Phase 3d-A: SlabMeta Box boundary
#include "box/free_remote_box.h" // For tiny_free_remote_box (cross-thread routing)
#include "box/ptr_conversion_box.h" // Phase 10: Correct pointer arithmetic
// Phase 7: Header-based ultra-fast free
#if HAKMEM_TINY_HEADER_CLASSIDX
@ -48,7 +49,7 @@ static inline uint32_t tiny_self_u32_local(void) {
}
#endif
// ========== Ultra-Fast Free (Header-based) ==========
// ========== Ultra-Fast Free (Header-based) ==========
// Ultra-fast free for header-based allocations
// Returns: 1 if handled, 0 if needs slow path
@ -57,11 +58,11 @@ static inline uint32_t tiny_self_u32_local(void) {
// vs Current: 330+ lines, 500+ cycles (100x faster!)
//
// Assembly (x86-64, release build):
// movzbl -0x1(%rdi),%eax # Read header (class_idx)
// mov g_tls_sll_head(,%rax,8),%rdx # Load head
// mov %rdx,(%rdi) # ptr->next = head
// mov %rdi,g_tls_sll_head(,%rax,8) # head = ptr
// addl $0x1,g_tls_sll_count(,%rax,4) # count++
// movzbl -0x1(%rdi),%eax // Read header (class_idx)
// mov g_tls_sll_head(,%rax,8),%rdx // Load head
// mov %rdx,(%rdi) // ptr->next = head
// mov %rdi,g_tls_sll_head(,%rax,8) // head = ptr
// addl $0x1,g_tls_sll_count(,%rax,4) // count++
// ret
//
// Expected: 3-5 instructions, 5-10 cycles (L1 hit)
@ -79,7 +80,8 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
// Expected: 9M → 30-50M ops/s recovery (+226-443%)
// CRITICAL: Check if header is accessible before reading
void* header_addr = (char*)ptr - 1;
// FIX: Use ptr directly, not ptr-1, for validation if possible, or trust lookup
// void* header_addr = (char*)ptr - 1; // <-- Dangerous for C0
#if !HAKMEM_BUILD_RELEASE
// Debug: Validate header accessibility (metadata-based check)
@ -87,7 +89,7 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
// Strategy: Trust internal metadata (registry ensures memory is valid)
// Benefit: Catch invalid pointers via header magic validation below
extern int hak_is_memory_readable(void* addr);
if (!hak_is_memory_readable(header_addr)) {
if (!hak_is_memory_readable(ptr)) { // Check ptr, not header_addr
return 0; // Header not accessible - not a Tiny allocation
}
#else
@ -118,9 +120,11 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
if (__builtin_expect(g_use_class_map, 1)) {
// P1.2: class_map path - avoid Header read
SuperSlab* ss = ss_fast_lookup((uint8_t*)ptr - 1);
// FIX: Use ptr (USER) for lookup, NOT ptr-1
SuperSlab* ss = ss_fast_lookup(ptr);
if (ss && ss->magic == SUPERSLAB_MAGIC) {
int slab_idx = slab_index_for(ss, (uint8_t*)ptr - 1);
// FIX: Use ptr (USER) for slab index
int slab_idx = slab_index_for(ss, ptr);
if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
int map_class = tiny_get_class_from_ss(ss, slab_idx);
if (map_class < TINY_NUM_CLASSES) {
@ -161,9 +165,11 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
// Cross-check header class vs meta class (if available from fast lookup)
do {
// Try fast owner slab lookup to get meta->class_idx for comparison
SuperSlab* ss = hak_super_lookup((uint8_t*)ptr - 1);
// FIX: Use ptr (USER)
SuperSlab* ss = hak_super_lookup(ptr);
if (ss && ss->magic == SUPERSLAB_MAGIC) {
int sidx = slab_index_for(ss, (uint8_t*)ptr - 1);
// FIX: Use ptr (USER)
int sidx = slab_index_for(ss, ptr);
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
TinySlabMeta* m = &ss->slabs[sidx];
uint8_t meta_cls = m->class_idx;
@ -217,7 +223,8 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
// 3. Push base to TLS freelist (4 instructions, 5-7 cycles)
// Must push base (block start) not user pointer!
// Phase E1: ALL classes (C0-C7) have 1-byte header → base = ptr-1
void* base = (char*)ptr - 1;
// FIX: Use ptr_user_to_base(ptr, class_idx) logic
void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx));
// Phase 14-C: UltraHot は free 時に横取りしないBorrowing 設計)
// → 正史TLS SLLの在庫を正しく保つ
@ -237,6 +244,7 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
// Phase 12 optimization: Use fast mask-based lookup (~5-10 cycles vs 50-100)
SuperSlab* ss = ss_fast_lookup(base);
if (__builtin_expect(ss != NULL, 1)) {
// FIX: slab_index_for on BASE (since base is correct now)
int slab_idx = slab_index_for(ss, base);
if (__builtin_expect(slab_idx >= 0, 1)) {
uint32_t self_tid = tiny_self_u32_local();
@ -351,7 +359,7 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
return 1; // Success - handled in fast path
}
// ========== Free Entry Point ==========
// ========== Free Entry Point ==========
// Entry point for free() - tries fast path first, falls back to slow path
//
@ -373,7 +381,7 @@ static inline void hak_free_fast_v2_entry(void* ptr) {
hak_tiny_free(ptr);
}
// ========== Performance Counters (Debug) ==========
// ========== Performance Counters (Debug) ==========
#if !HAKMEM_BUILD_RELEASE
// Performance counters (TLS, lightweight)
@ -405,7 +413,7 @@ static inline void hak_free_v2_track_fast(void) {}
static inline void hak_free_v2_track_slow(void) {}
#endif
// ========== Benchmark Comparison ==========
// ========== Benchmark Comparison ==========
//
// Current (hak_tiny_free_superslab):
// - 2x SuperSlab lookup: 200+ cycles
@ -425,4 +433,4 @@ static inline void hak_free_v2_track_slow(void) {}
// - HAKMEM: 5-10 cycles (3-5 instructions)
// - Result: 70-110% of System speed (互角〜勝ち!)
#endif // HAKMEM_TINY_HEADER_CLASSIDX
#endif // HAKMEM_TINY_HEADER_CLASSIDX