diff --git a/Makefile b/Makefile index 4b91bc7e..640bf7dc 100644 --- a/Makefile +++ b/Makefile @@ -251,7 +251,7 @@ endif # Benchmark targets BENCH_HAKMEM = bench_allocators_hakmem BENCH_SYSTEM = bench_allocators_system -BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o bench_allocators_hakmem.o +BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/wrapper_env_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o bench_allocators_hakmem.o BENCH_HAKMEM_OBJS = $(BENCH_HAKMEM_OBJS_BASE) ifeq ($(POOL_TLS_PHASE1),1) BENCH_HAKMEM_OBJS += pool_tls.o pool_refill.o pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o @@ -428,7 +428,7 @@ test-box-refactor: box-refactor ./larson_hakmem 10 8 128 1024 1 12345 4 # Phase 4: Tiny Pool benchmarks (properly linked with hakmem) -TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/tiny_sizeclass_hist_box.o core/box/pagefault_telemetry_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o +TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/tiny_sizeclass_hist_box.o core/box/pagefault_telemetry_box.o core/box/tiny_env_box.o core/box/wrapper_env_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE) ifeq ($(POOL_TLS_PHASE1),1) TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o diff --git a/core/hakmem_tiny_free.inc b/core/hakmem_tiny_free.inc index 12a00102..1a5241aa 100644 --- a/core/hakmem_tiny_free.inc +++ b/core/hakmem_tiny_free.inc @@ -125,8 +125,8 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) { if (!ss || ss->magic != SUPERSLAB_MAGIC) return; // Derive class_idx from per-slab metadata instead of ss->size_class int class_idx = -1; - void* base = ptr_user_to_base_blind(ptr); - int slab_idx = slab_index_for(ss, base); + // void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr (USER) directly + int slab_idx = slab_index_for(ss, ptr); // FIX: slab_index_for works better with ptr (USER) for C0/C7 if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) { TinySlabMeta* meta_probe = &ss->slabs[slab_idx]; if (meta_probe->class_idx < TINY_NUM_CLASSES) { @@ -155,9 +155,9 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) { } tiny_debug_ring_record(TINY_RING_EVENT_FREE_ENTER, (uint16_t)class_idx, ptr, 0); // Detect cross-thread: cross-thread free MUST go via superslab path - // ✅ FIX: Phase E1-CORRECT - Convert USER → BASE before slab index calculation - base = ptr_user_to_base_blind(ptr); - slab_idx = slab_index_for(ss, base); + // FIX: Use ptr (USER) for slab index calculation to handle C0/C7 boundary correctly + // base = ptr_user_to_base_blind(ptr); + slab_idx = slab_index_for(ss, ptr); int ss_cap = ss_slabs_capacity(ss); if (__builtin_expect(slab_idx < 0 || slab_idx >= ss_cap, 0)) { tiny_debug_ring_record(TINY_RING_EVENT_SUPERSLAB_ADOPT_FAIL, (uint16_t)0xFEu, ss, (uintptr_t)slab_idx); @@ -167,8 +167,8 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) { if (__builtin_expect(g_tiny_safe_free, 0)) { size_t blk = g_tiny_class_sizes[class_idx]; uint8_t* slab_base = tiny_slab_base_for(ss, slab_idx); - // Phase E1-CORRECT: All classes have headers, validate block base (ptr-1) not user ptr - uintptr_t delta = (uintptr_t)ptr_user_to_base_blind(ptr) - (uintptr_t)slab_base; + // Phase E1-CORRECT: All classes have headers, validate block base using known class_idx + uintptr_t delta = (uintptr_t)HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx)) - (uintptr_t)slab_base; int cap_ok = (meta->capacity > 0) ? 1 : 0; int align_ok = (delta % blk) == 0; int range_ok = cap_ok && (delta / blk) < meta->capacity; @@ -216,7 +216,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) { if (__builtin_expect(g_debug_fast0, 0)) { tiny_debug_ring_record(TINY_RING_EVENT_FRONT_BYPASS, (uint16_t)class_idx, ptr, (uintptr_t)slab_idx); // Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header - void* base = ptr_user_to_base_blind(ptr); + void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx)); void* prev = meta->freelist; tiny_next_write(class_idx, base, prev); // Box API: uses offset 1 for headers meta->freelist = base; @@ -234,7 +234,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) { // Front-V2: try to return to TLS magazine first (A/B, default OFF) // Phase 7-Step8: Use config macro for dead code elimination in PGO mode if (__builtin_expect(TINY_FRONT_HEAP_V2_ENABLED && class_idx <= 3, 0)) { - void* base = ptr_user_to_base_blind(ptr); + void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx)); if (tiny_heap_v2_try_push(class_idx, base)) { tiny_debug_ring_record(TINY_RING_EVENT_FREE_FAST, (uint16_t)class_idx, ptr, slab_idx); HAK_STAT_FREE(class_idx); @@ -244,7 +244,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) { if (g_fast_enable && g_fast_cap[class_idx] != 0) { // Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header - void* base = ptr_user_to_base_blind(ptr); + void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx)); int pushed = 0; // Phase 7-Step5: Use config macro for dead code elimination in PGO mode if (__builtin_expect(TINY_FRONT_FASTCACHE_ENABLED && class_idx <= 3, 1)) { @@ -268,7 +268,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) { // TinyHotMag front push(8/16/32B, A/B) if (__builtin_expect(g_hotmag_enable && class_idx <= 2, 1)) { // Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header - void* base = ptr_user_to_base_blind(ptr); + void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx)); if (hotmag_push(class_idx, base)) { tiny_debug_ring_record(TINY_RING_EVENT_FREE_RETURN_MAG, (uint16_t)class_idx, ptr, 1); HAK_STAT_FREE(class_idx); @@ -277,7 +277,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) { } if (tls->count < tls->cap) { // Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header - void* base = ptr_user_to_base_blind(ptr); + void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx)); tiny_tls_list_guard_push(class_idx, tls, base); tls_list_push(tls, base, class_idx); tiny_debug_ring_record(TINY_RING_EVENT_FREE_LOCAL, (uint16_t)class_idx, ptr, 0); @@ -290,7 +290,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) { } { // Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header - void* base = ptr_user_to_base_blind(ptr); + void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx)); tiny_tls_list_guard_push(class_idx, tls, base); tls_list_push(tls, base, class_idx); } @@ -307,9 +307,9 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) { } #include "tiny_free_magazine.inc.h" -// ============================================================================ +// ============================================================================ // Phase 6.23: SuperSlab Allocation Helpers -// ============================================================================ +// ============================================================================ // Phase 6.24: Allocate from SuperSlab slab (lazy freelist + linear allocation) #include "tiny_superslab_alloc.inc.h" @@ -332,8 +332,8 @@ void hak_tiny_free(void* ptr) { // Resolve class_idx from per-slab metadata instead of ss->size_class SuperSlab* ss = hak_super_lookup(ptr); if (ss && ss->magic == SUPERSLAB_MAGIC) { - void* base = ptr_user_to_base_blind(ptr); - int sidx = slab_index_for(ss, base); + // void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr + int sidx = slab_index_for(ss, ptr); if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) { TinySlabMeta* m = &ss->slabs[sidx]; if (m->class_idx < TINY_NUM_CLASSES) { @@ -392,8 +392,8 @@ void hak_tiny_free(void* ptr) { // Resolve class_idx from per-slab metadata instead of ss->size_class SuperSlab* ss = hak_super_lookup(ptr); if (ss && ss->magic == SUPERSLAB_MAGIC) { - void* base = ptr_user_to_base_blind(ptr); - int sidx = slab_index_for(ss, base); + // void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr + int sidx = slab_index_for(ss, ptr); if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) { TinySlabMeta* m = &ss->slabs[sidx]; if (m->class_idx < TINY_NUM_CLASSES) { @@ -470,8 +470,8 @@ void hak_tiny_free(void* ptr) { if (g_use_superslab) { fast_ss = hak_super_lookup(ptr); if (fast_ss && fast_ss->magic == SUPERSLAB_MAGIC) { - void* base = ptr_user_to_base_blind(ptr); - int sidx = slab_index_for(fast_ss, base); + // void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr + int sidx = slab_index_for(fast_ss, ptr); if (sidx >= 0 && sidx < ss_slabs_capacity(fast_ss)) { TinySlabMeta* m = &fast_ss->slabs[sidx]; if (m->class_idx < TINY_NUM_CLASSES) { @@ -494,8 +494,8 @@ void hak_tiny_free(void* ptr) { int ss_cls = -1, ts_cls = -1; SuperSlab* chk_ss = fast_ss ? fast_ss : (g_use_superslab ? hak_super_lookup(ptr) : NULL); if (chk_ss && chk_ss->magic == SUPERSLAB_MAGIC) { - void* base = ptr_user_to_base_blind(ptr); - int sidx = slab_index_for(chk_ss, base); + // void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr + int sidx = slab_index_for(chk_ss, ptr); if (sidx >= 0 && sidx < ss_slabs_capacity(chk_ss)) { TinySlabMeta* m = &chk_ss->slabs[sidx]; if (m->class_idx < TINY_NUM_CLASSES) { @@ -516,7 +516,7 @@ void hak_tiny_free(void* ptr) { } if (fast_class_idx >= 0 && g_fast_enable && g_fast_cap[fast_class_idx] != 0) { // Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header - void* base2 = ptr_user_to_base_blind(ptr); + void* base2 = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), fast_class_idx)); // PRIORITY 1: Try FastCache first (bypasses SLL when Front-Direct) int pushed = 0; // Phase 7-Step5: Use config macro for dead code elimination in PGO mode @@ -543,8 +543,8 @@ void hak_tiny_free(void* ptr) { if (ss && ss->magic == SUPERSLAB_MAGIC) { // Derive class from per-slab meta int cls = -1; - void* base = ptr_user_to_base_blind(ptr); - int sidx = slab_index_for(ss, base); + // void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr + int sidx = slab_index_for(ss, ptr); if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) { TinySlabMeta* m = &ss->slabs[sidx]; if (m->class_idx < TINY_NUM_CLASSES) { @@ -574,9 +574,9 @@ void hak_tiny_free(void* ptr) { hak_tiny_free_with_slab(ptr, slab); } -// ============================================================================ +// ============================================================================ // EXTRACTED TO hakmem_tiny_query.c (Phase 2B-1) -// ============================================================================ +// ============================================================================ // EXTRACTED: int hak_tiny_is_managed(void* ptr) { // EXTRACTED: if (!ptr || !g_tiny_initialized) return 0; // EXTRACTED: // Phase 6.12.1: O(1) slab lookup via registry/list @@ -631,9 +631,9 @@ void hak_tiny_free(void* ptr) { // EXTRACTED: } -// ============================================================================ +// ============================================================================ // Statistics and Debug Functions - Extracted to hakmem_tiny_stats.c -// ============================================================================ +// ============================================================================ // (Phase 2B API headers moved to top of file) @@ -672,4 +672,4 @@ void hak_tiny_shutdown(void) { -// Always-available: Trim empty slabs (release fully-free slabs) +// Always-available: Trim empty slabs (release fully-free slabs) \ No newline at end of file diff --git a/core/tiny_free_fast_v2.inc.h b/core/tiny_free_fast_v2.inc.h index c8ff7a54..80d6ba1e 100644 --- a/core/tiny_free_fast_v2.inc.h +++ b/core/tiny_free_fast_v2.inc.h @@ -29,6 +29,7 @@ #include "superslab/superslab_inline.h" // For slab_index_for (cross-thread check) #include "box/ss_slab_meta_box.h" // Phase 3d-A: SlabMeta Box boundary #include "box/free_remote_box.h" // For tiny_free_remote_box (cross-thread routing) +#include "box/ptr_conversion_box.h" // Phase 10: Correct pointer arithmetic // Phase 7: Header-based ultra-fast free #if HAKMEM_TINY_HEADER_CLASSIDX @@ -48,7 +49,7 @@ static inline uint32_t tiny_self_u32_local(void) { } #endif -// ========== Ultra-Fast Free (Header-based) ========== +// ========== Ultra-Fast Free (Header-based) ========== // Ultra-fast free for header-based allocations // Returns: 1 if handled, 0 if needs slow path @@ -57,11 +58,11 @@ static inline uint32_t tiny_self_u32_local(void) { // vs Current: 330+ lines, 500+ cycles (100x faster!) // // Assembly (x86-64, release build): -// movzbl -0x1(%rdi),%eax # Read header (class_idx) -// mov g_tls_sll_head(,%rax,8),%rdx # Load head -// mov %rdx,(%rdi) # ptr->next = head -// mov %rdi,g_tls_sll_head(,%rax,8) # head = ptr -// addl $0x1,g_tls_sll_count(,%rax,4) # count++ +// movzbl -0x1(%rdi),%eax // Read header (class_idx) +// mov g_tls_sll_head(,%rax,8),%rdx // Load head +// mov %rdx,(%rdi) // ptr->next = head +// mov %rdi,g_tls_sll_head(,%rax,8) // head = ptr +// addl $0x1,g_tls_sll_count(,%rax,4) // count++ // ret // // Expected: 3-5 instructions, 5-10 cycles (L1 hit) @@ -79,7 +80,8 @@ static inline int hak_tiny_free_fast_v2(void* ptr) { // Expected: 9M → 30-50M ops/s recovery (+226-443%) // CRITICAL: Check if header is accessible before reading - void* header_addr = (char*)ptr - 1; + // FIX: Use ptr directly, not ptr-1, for validation if possible, or trust lookup + // void* header_addr = (char*)ptr - 1; // <-- Dangerous for C0 #if !HAKMEM_BUILD_RELEASE // Debug: Validate header accessibility (metadata-based check) @@ -87,7 +89,7 @@ static inline int hak_tiny_free_fast_v2(void* ptr) { // Strategy: Trust internal metadata (registry ensures memory is valid) // Benefit: Catch invalid pointers via header magic validation below extern int hak_is_memory_readable(void* addr); - if (!hak_is_memory_readable(header_addr)) { + if (!hak_is_memory_readable(ptr)) { // Check ptr, not header_addr return 0; // Header not accessible - not a Tiny allocation } #else @@ -118,9 +120,11 @@ static inline int hak_tiny_free_fast_v2(void* ptr) { if (__builtin_expect(g_use_class_map, 1)) { // P1.2: class_map path - avoid Header read - SuperSlab* ss = ss_fast_lookup((uint8_t*)ptr - 1); + // FIX: Use ptr (USER) for lookup, NOT ptr-1 + SuperSlab* ss = ss_fast_lookup(ptr); if (ss && ss->magic == SUPERSLAB_MAGIC) { - int slab_idx = slab_index_for(ss, (uint8_t*)ptr - 1); + // FIX: Use ptr (USER) for slab index + int slab_idx = slab_index_for(ss, ptr); if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) { int map_class = tiny_get_class_from_ss(ss, slab_idx); if (map_class < TINY_NUM_CLASSES) { @@ -161,9 +165,11 @@ static inline int hak_tiny_free_fast_v2(void* ptr) { // Cross-check header class vs meta class (if available from fast lookup) do { // Try fast owner slab lookup to get meta->class_idx for comparison - SuperSlab* ss = hak_super_lookup((uint8_t*)ptr - 1); + // FIX: Use ptr (USER) + SuperSlab* ss = hak_super_lookup(ptr); if (ss && ss->magic == SUPERSLAB_MAGIC) { - int sidx = slab_index_for(ss, (uint8_t*)ptr - 1); + // FIX: Use ptr (USER) + int sidx = slab_index_for(ss, ptr); if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) { TinySlabMeta* m = &ss->slabs[sidx]; uint8_t meta_cls = m->class_idx; @@ -217,7 +223,8 @@ static inline int hak_tiny_free_fast_v2(void* ptr) { // 3. Push base to TLS freelist (4 instructions, 5-7 cycles) // Must push base (block start) not user pointer! // Phase E1: ALL classes (C0-C7) have 1-byte header → base = ptr-1 - void* base = (char*)ptr - 1; + // FIX: Use ptr_user_to_base(ptr, class_idx) logic + void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx)); // Phase 14-C: UltraHot は free 時に横取りしない(Borrowing 設計) // → 正史(TLS SLL)の在庫を正しく保つ @@ -237,6 +244,7 @@ static inline int hak_tiny_free_fast_v2(void* ptr) { // Phase 12 optimization: Use fast mask-based lookup (~5-10 cycles vs 50-100) SuperSlab* ss = ss_fast_lookup(base); if (__builtin_expect(ss != NULL, 1)) { + // FIX: slab_index_for on BASE (since base is correct now) int slab_idx = slab_index_for(ss, base); if (__builtin_expect(slab_idx >= 0, 1)) { uint32_t self_tid = tiny_self_u32_local(); @@ -351,7 +359,7 @@ static inline int hak_tiny_free_fast_v2(void* ptr) { return 1; // Success - handled in fast path } -// ========== Free Entry Point ========== +// ========== Free Entry Point ========== // Entry point for free() - tries fast path first, falls back to slow path // @@ -373,7 +381,7 @@ static inline void hak_free_fast_v2_entry(void* ptr) { hak_tiny_free(ptr); } -// ========== Performance Counters (Debug) ========== +// ========== Performance Counters (Debug) ========== #if !HAKMEM_BUILD_RELEASE // Performance counters (TLS, lightweight) @@ -405,7 +413,7 @@ static inline void hak_free_v2_track_fast(void) {} static inline void hak_free_v2_track_slow(void) {} #endif -// ========== Benchmark Comparison ========== +// ========== Benchmark Comparison ========== // // Current (hak_tiny_free_superslab): // - 2x SuperSlab lookup: 200+ cycles @@ -425,4 +433,4 @@ static inline void hak_free_v2_track_slow(void) {} // - HAKMEM: 5-10 cycles (3-5 instructions) // - Result: 70-110% of System speed (互角〜勝ち!) -#endif // HAKMEM_TINY_HEADER_CLASSIDX +#endif // HAKMEM_TINY_HEADER_CLASSIDX \ No newline at end of file diff --git a/core/tiny_free_magazine.inc.h b/core/tiny_free_magazine.inc.h index 9aca1bef..c9e506cc 100644 --- a/core/tiny_free_magazine.inc.h +++ b/core/tiny_free_magazine.inc.h @@ -127,6 +127,7 @@ if (owner_ss && owner_ss->magic == SUPERSLAB_MAGIC) { // Direct freelist push (same as old hak_tiny_free_superslab) // Phase 10: it.ptr is BASE. + // FIX: it.ptr is BASE, use it directly (do not subtract 1) void* base = it.ptr; int slab_idx = slab_index_for(owner_ss, base); // BUGFIX: Validate slab_idx before array access (prevents OOB) @@ -320,8 +321,8 @@ SuperSlab* ss_owner = hak_super_lookup(it.ptr); if (ss_owner && ss_owner->magic == SUPERSLAB_MAGIC) { // SuperSlab spill - return to freelist - // ✅ FIX: Phase E1-CORRECT - Convert USER → BASE before slab index calculation - void* base = (void*)((uint8_t*)it.ptr - 1); + // FIX: it.ptr is BASE, use directly + void* base = it.ptr; int slab_idx = slab_index_for(ss_owner, base); // BUGFIX: Validate slab_idx before array access (prevents OOB) if (slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss_owner)) { @@ -430,7 +431,7 @@ mag->top++; } } - } else if (!tiny_optional_push(class_idx, (void*)((uint8_t*)ptr - 1))) { // Phase E1-CORRECT + } else if (!tiny_optional_push(class_idx, HAK_BASE_TO_RAW(hak_user_to_base(HAK_USER_FROM_RAW(ptr))))) { // FIX: use ptr_user_to_base // Phase 10: Use hak_base_ptr_t hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr)); mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr); @@ -467,7 +468,7 @@ mag->top++; } } - } else if (!tiny_optional_push(class_idx, (void*)((uint8_t*)ptr - 1))) { // Phase E1-CORRECT + } else if (!tiny_optional_push(class_idx, HAK_BASE_TO_RAW(hak_user_to_base(HAK_USER_FROM_RAW(ptr))))) { // FIX: use ptr_user_to_base // Phase 10: Use hak_base_ptr_t hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr)); mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr); @@ -497,7 +498,8 @@ return; } else if (slab) { // Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header - void* base = (void*)((uint8_t*)ptr - 1); + // FIX: Use ptr_user_to_base to get correct base + void* base = HAK_BASE_TO_RAW(hak_user_to_base(HAK_USER_FROM_RAW(ptr))); tiny_remote_push(slab, base); } } \ No newline at end of file