Fix ptr_user_to_base_blind regression: use class-aware base calculation and correct slab index lookup
This commit is contained in:
@ -29,6 +29,7 @@
|
||||
#include "superslab/superslab_inline.h" // For slab_index_for (cross-thread check)
|
||||
#include "box/ss_slab_meta_box.h" // Phase 3d-A: SlabMeta Box boundary
|
||||
#include "box/free_remote_box.h" // For tiny_free_remote_box (cross-thread routing)
|
||||
#include "box/ptr_conversion_box.h" // Phase 10: Correct pointer arithmetic
|
||||
|
||||
// Phase 7: Header-based ultra-fast free
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
@ -48,7 +49,7 @@ static inline uint32_t tiny_self_u32_local(void) {
|
||||
}
|
||||
#endif
|
||||
|
||||
// ========== Ultra-Fast Free (Header-based) ==========
|
||||
// ========== Ultra-Fast Free (Header-based) ==========
|
||||
|
||||
// Ultra-fast free for header-based allocations
|
||||
// Returns: 1 if handled, 0 if needs slow path
|
||||
@ -57,11 +58,11 @@ static inline uint32_t tiny_self_u32_local(void) {
|
||||
// vs Current: 330+ lines, 500+ cycles (100x faster!)
|
||||
//
|
||||
// Assembly (x86-64, release build):
|
||||
// movzbl -0x1(%rdi),%eax # Read header (class_idx)
|
||||
// mov g_tls_sll_head(,%rax,8),%rdx # Load head
|
||||
// mov %rdx,(%rdi) # ptr->next = head
|
||||
// mov %rdi,g_tls_sll_head(,%rax,8) # head = ptr
|
||||
// addl $0x1,g_tls_sll_count(,%rax,4) # count++
|
||||
// movzbl -0x1(%rdi),%eax // Read header (class_idx)
|
||||
// mov g_tls_sll_head(,%rax,8),%rdx // Load head
|
||||
// mov %rdx,(%rdi) // ptr->next = head
|
||||
// mov %rdi,g_tls_sll_head(,%rax,8) // head = ptr
|
||||
// addl $0x1,g_tls_sll_count(,%rax,4) // count++
|
||||
// ret
|
||||
//
|
||||
// Expected: 3-5 instructions, 5-10 cycles (L1 hit)
|
||||
@ -79,7 +80,8 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
|
||||
// Expected: 9M → 30-50M ops/s recovery (+226-443%)
|
||||
|
||||
// CRITICAL: Check if header is accessible before reading
|
||||
void* header_addr = (char*)ptr - 1;
|
||||
// FIX: Use ptr directly, not ptr-1, for validation if possible, or trust lookup
|
||||
// void* header_addr = (char*)ptr - 1; // <-- Dangerous for C0
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
// Debug: Validate header accessibility (metadata-based check)
|
||||
@ -87,7 +89,7 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
|
||||
// Strategy: Trust internal metadata (registry ensures memory is valid)
|
||||
// Benefit: Catch invalid pointers via header magic validation below
|
||||
extern int hak_is_memory_readable(void* addr);
|
||||
if (!hak_is_memory_readable(header_addr)) {
|
||||
if (!hak_is_memory_readable(ptr)) { // Check ptr, not header_addr
|
||||
return 0; // Header not accessible - not a Tiny allocation
|
||||
}
|
||||
#else
|
||||
@ -118,9 +120,11 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
|
||||
|
||||
if (__builtin_expect(g_use_class_map, 1)) {
|
||||
// P1.2: class_map path - avoid Header read
|
||||
SuperSlab* ss = ss_fast_lookup((uint8_t*)ptr - 1);
|
||||
// FIX: Use ptr (USER) for lookup, NOT ptr-1
|
||||
SuperSlab* ss = ss_fast_lookup(ptr);
|
||||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||||
int slab_idx = slab_index_for(ss, (uint8_t*)ptr - 1);
|
||||
// FIX: Use ptr (USER) for slab index
|
||||
int slab_idx = slab_index_for(ss, ptr);
|
||||
if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
|
||||
int map_class = tiny_get_class_from_ss(ss, slab_idx);
|
||||
if (map_class < TINY_NUM_CLASSES) {
|
||||
@ -161,9 +165,11 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
|
||||
// Cross-check header class vs meta class (if available from fast lookup)
|
||||
do {
|
||||
// Try fast owner slab lookup to get meta->class_idx for comparison
|
||||
SuperSlab* ss = hak_super_lookup((uint8_t*)ptr - 1);
|
||||
// FIX: Use ptr (USER)
|
||||
SuperSlab* ss = hak_super_lookup(ptr);
|
||||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||||
int sidx = slab_index_for(ss, (uint8_t*)ptr - 1);
|
||||
// FIX: Use ptr (USER)
|
||||
int sidx = slab_index_for(ss, ptr);
|
||||
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
|
||||
TinySlabMeta* m = &ss->slabs[sidx];
|
||||
uint8_t meta_cls = m->class_idx;
|
||||
@ -217,7 +223,8 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
|
||||
// 3. Push base to TLS freelist (4 instructions, 5-7 cycles)
|
||||
// Must push base (block start) not user pointer!
|
||||
// Phase E1: ALL classes (C0-C7) have 1-byte header → base = ptr-1
|
||||
void* base = (char*)ptr - 1;
|
||||
// FIX: Use ptr_user_to_base(ptr, class_idx) logic
|
||||
void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx));
|
||||
|
||||
// Phase 14-C: UltraHot は free 時に横取りしない(Borrowing 設計)
|
||||
// → 正史(TLS SLL)の在庫を正しく保つ
|
||||
@ -237,6 +244,7 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
|
||||
// Phase 12 optimization: Use fast mask-based lookup (~5-10 cycles vs 50-100)
|
||||
SuperSlab* ss = ss_fast_lookup(base);
|
||||
if (__builtin_expect(ss != NULL, 1)) {
|
||||
// FIX: slab_index_for on BASE (since base is correct now)
|
||||
int slab_idx = slab_index_for(ss, base);
|
||||
if (__builtin_expect(slab_idx >= 0, 1)) {
|
||||
uint32_t self_tid = tiny_self_u32_local();
|
||||
@ -351,7 +359,7 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
|
||||
return 1; // Success - handled in fast path
|
||||
}
|
||||
|
||||
// ========== Free Entry Point ==========
|
||||
// ========== Free Entry Point ==========
|
||||
|
||||
// Entry point for free() - tries fast path first, falls back to slow path
|
||||
//
|
||||
@ -373,7 +381,7 @@ static inline void hak_free_fast_v2_entry(void* ptr) {
|
||||
hak_tiny_free(ptr);
|
||||
}
|
||||
|
||||
// ========== Performance Counters (Debug) ==========
|
||||
// ========== Performance Counters (Debug) ==========
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
// Performance counters (TLS, lightweight)
|
||||
@ -405,7 +413,7 @@ static inline void hak_free_v2_track_fast(void) {}
|
||||
static inline void hak_free_v2_track_slow(void) {}
|
||||
#endif
|
||||
|
||||
// ========== Benchmark Comparison ==========
|
||||
// ========== Benchmark Comparison ==========
|
||||
//
|
||||
// Current (hak_tiny_free_superslab):
|
||||
// - 2x SuperSlab lookup: 200+ cycles
|
||||
@ -425,4 +433,4 @@ static inline void hak_free_v2_track_slow(void) {}
|
||||
// - HAKMEM: 5-10 cycles (3-5 instructions)
|
||||
// - Result: 70-110% of System speed (互角〜勝ち!)
|
||||
|
||||
#endif // HAKMEM_TINY_HEADER_CLASSIDX
|
||||
#endif // HAKMEM_TINY_HEADER_CLASSIDX
|
||||
Reference in New Issue
Block a user