Fix ptr_user_to_base_blind regression: use class-aware base calculation and correct slab index lookup
This commit is contained in:
4
Makefile
4
Makefile
@ -251,7 +251,7 @@ endif
|
|||||||
# Benchmark targets
|
# Benchmark targets
|
||||||
BENCH_HAKMEM = bench_allocators_hakmem
|
BENCH_HAKMEM = bench_allocators_hakmem
|
||||||
BENCH_SYSTEM = bench_allocators_system
|
BENCH_SYSTEM = bench_allocators_system
|
||||||
BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o bench_allocators_hakmem.o
|
BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/wrapper_env_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o bench_allocators_hakmem.o
|
||||||
BENCH_HAKMEM_OBJS = $(BENCH_HAKMEM_OBJS_BASE)
|
BENCH_HAKMEM_OBJS = $(BENCH_HAKMEM_OBJS_BASE)
|
||||||
ifeq ($(POOL_TLS_PHASE1),1)
|
ifeq ($(POOL_TLS_PHASE1),1)
|
||||||
BENCH_HAKMEM_OBJS += pool_tls.o pool_refill.o pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o
|
BENCH_HAKMEM_OBJS += pool_tls.o pool_refill.o pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o
|
||||||
@ -428,7 +428,7 @@ test-box-refactor: box-refactor
|
|||||||
./larson_hakmem 10 8 128 1024 1 12345 4
|
./larson_hakmem 10 8 128 1024 1 12345 4
|
||||||
|
|
||||||
# Phase 4: Tiny Pool benchmarks (properly linked with hakmem)
|
# Phase 4: Tiny Pool benchmarks (properly linked with hakmem)
|
||||||
TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/tiny_sizeclass_hist_box.o core/box/pagefault_telemetry_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o
|
TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/tiny_sizeclass_hist_box.o core/box/pagefault_telemetry_box.o core/box/tiny_env_box.o core/box/wrapper_env_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o
|
||||||
TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE)
|
TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE)
|
||||||
ifeq ($(POOL_TLS_PHASE1),1)
|
ifeq ($(POOL_TLS_PHASE1),1)
|
||||||
TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o
|
TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o
|
||||||
|
|||||||
@ -125,8 +125,8 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
|||||||
if (!ss || ss->magic != SUPERSLAB_MAGIC) return;
|
if (!ss || ss->magic != SUPERSLAB_MAGIC) return;
|
||||||
// Derive class_idx from per-slab metadata instead of ss->size_class
|
// Derive class_idx from per-slab metadata instead of ss->size_class
|
||||||
int class_idx = -1;
|
int class_idx = -1;
|
||||||
void* base = ptr_user_to_base_blind(ptr);
|
// void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr (USER) directly
|
||||||
int slab_idx = slab_index_for(ss, base);
|
int slab_idx = slab_index_for(ss, ptr); // FIX: slab_index_for works better with ptr (USER) for C0/C7
|
||||||
if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
|
if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
|
||||||
TinySlabMeta* meta_probe = &ss->slabs[slab_idx];
|
TinySlabMeta* meta_probe = &ss->slabs[slab_idx];
|
||||||
if (meta_probe->class_idx < TINY_NUM_CLASSES) {
|
if (meta_probe->class_idx < TINY_NUM_CLASSES) {
|
||||||
@ -155,9 +155,9 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
|||||||
}
|
}
|
||||||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_ENTER, (uint16_t)class_idx, ptr, 0);
|
tiny_debug_ring_record(TINY_RING_EVENT_FREE_ENTER, (uint16_t)class_idx, ptr, 0);
|
||||||
// Detect cross-thread: cross-thread free MUST go via superslab path
|
// Detect cross-thread: cross-thread free MUST go via superslab path
|
||||||
// ✅ FIX: Phase E1-CORRECT - Convert USER → BASE before slab index calculation
|
// FIX: Use ptr (USER) for slab index calculation to handle C0/C7 boundary correctly
|
||||||
base = ptr_user_to_base_blind(ptr);
|
// base = ptr_user_to_base_blind(ptr);
|
||||||
slab_idx = slab_index_for(ss, base);
|
slab_idx = slab_index_for(ss, ptr);
|
||||||
int ss_cap = ss_slabs_capacity(ss);
|
int ss_cap = ss_slabs_capacity(ss);
|
||||||
if (__builtin_expect(slab_idx < 0 || slab_idx >= ss_cap, 0)) {
|
if (__builtin_expect(slab_idx < 0 || slab_idx >= ss_cap, 0)) {
|
||||||
tiny_debug_ring_record(TINY_RING_EVENT_SUPERSLAB_ADOPT_FAIL, (uint16_t)0xFEu, ss, (uintptr_t)slab_idx);
|
tiny_debug_ring_record(TINY_RING_EVENT_SUPERSLAB_ADOPT_FAIL, (uint16_t)0xFEu, ss, (uintptr_t)slab_idx);
|
||||||
@ -167,8 +167,8 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
|||||||
if (__builtin_expect(g_tiny_safe_free, 0)) {
|
if (__builtin_expect(g_tiny_safe_free, 0)) {
|
||||||
size_t blk = g_tiny_class_sizes[class_idx];
|
size_t blk = g_tiny_class_sizes[class_idx];
|
||||||
uint8_t* slab_base = tiny_slab_base_for(ss, slab_idx);
|
uint8_t* slab_base = tiny_slab_base_for(ss, slab_idx);
|
||||||
// Phase E1-CORRECT: All classes have headers, validate block base (ptr-1) not user ptr
|
// Phase E1-CORRECT: All classes have headers, validate block base using known class_idx
|
||||||
uintptr_t delta = (uintptr_t)ptr_user_to_base_blind(ptr) - (uintptr_t)slab_base;
|
uintptr_t delta = (uintptr_t)HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx)) - (uintptr_t)slab_base;
|
||||||
int cap_ok = (meta->capacity > 0) ? 1 : 0;
|
int cap_ok = (meta->capacity > 0) ? 1 : 0;
|
||||||
int align_ok = (delta % blk) == 0;
|
int align_ok = (delta % blk) == 0;
|
||||||
int range_ok = cap_ok && (delta / blk) < meta->capacity;
|
int range_ok = cap_ok && (delta / blk) < meta->capacity;
|
||||||
@ -216,7 +216,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
|||||||
if (__builtin_expect(g_debug_fast0, 0)) {
|
if (__builtin_expect(g_debug_fast0, 0)) {
|
||||||
tiny_debug_ring_record(TINY_RING_EVENT_FRONT_BYPASS, (uint16_t)class_idx, ptr, (uintptr_t)slab_idx);
|
tiny_debug_ring_record(TINY_RING_EVENT_FRONT_BYPASS, (uint16_t)class_idx, ptr, (uintptr_t)slab_idx);
|
||||||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||||||
void* base = ptr_user_to_base_blind(ptr);
|
void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx));
|
||||||
void* prev = meta->freelist;
|
void* prev = meta->freelist;
|
||||||
tiny_next_write(class_idx, base, prev); // Box API: uses offset 1 for headers
|
tiny_next_write(class_idx, base, prev); // Box API: uses offset 1 for headers
|
||||||
meta->freelist = base;
|
meta->freelist = base;
|
||||||
@ -234,7 +234,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
|||||||
// Front-V2: try to return to TLS magazine first (A/B, default OFF)
|
// Front-V2: try to return to TLS magazine first (A/B, default OFF)
|
||||||
// Phase 7-Step8: Use config macro for dead code elimination in PGO mode
|
// Phase 7-Step8: Use config macro for dead code elimination in PGO mode
|
||||||
if (__builtin_expect(TINY_FRONT_HEAP_V2_ENABLED && class_idx <= 3, 0)) {
|
if (__builtin_expect(TINY_FRONT_HEAP_V2_ENABLED && class_idx <= 3, 0)) {
|
||||||
void* base = ptr_user_to_base_blind(ptr);
|
void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx));
|
||||||
if (tiny_heap_v2_try_push(class_idx, base)) {
|
if (tiny_heap_v2_try_push(class_idx, base)) {
|
||||||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_FAST, (uint16_t)class_idx, ptr, slab_idx);
|
tiny_debug_ring_record(TINY_RING_EVENT_FREE_FAST, (uint16_t)class_idx, ptr, slab_idx);
|
||||||
HAK_STAT_FREE(class_idx);
|
HAK_STAT_FREE(class_idx);
|
||||||
@ -244,7 +244,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
|||||||
|
|
||||||
if (g_fast_enable && g_fast_cap[class_idx] != 0) {
|
if (g_fast_enable && g_fast_cap[class_idx] != 0) {
|
||||||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||||||
void* base = ptr_user_to_base_blind(ptr);
|
void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx));
|
||||||
int pushed = 0;
|
int pushed = 0;
|
||||||
// Phase 7-Step5: Use config macro for dead code elimination in PGO mode
|
// Phase 7-Step5: Use config macro for dead code elimination in PGO mode
|
||||||
if (__builtin_expect(TINY_FRONT_FASTCACHE_ENABLED && class_idx <= 3, 1)) {
|
if (__builtin_expect(TINY_FRONT_FASTCACHE_ENABLED && class_idx <= 3, 1)) {
|
||||||
@ -268,7 +268,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
|||||||
// TinyHotMag front push(8/16/32B, A/B)
|
// TinyHotMag front push(8/16/32B, A/B)
|
||||||
if (__builtin_expect(g_hotmag_enable && class_idx <= 2, 1)) {
|
if (__builtin_expect(g_hotmag_enable && class_idx <= 2, 1)) {
|
||||||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||||||
void* base = ptr_user_to_base_blind(ptr);
|
void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx));
|
||||||
if (hotmag_push(class_idx, base)) {
|
if (hotmag_push(class_idx, base)) {
|
||||||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_RETURN_MAG, (uint16_t)class_idx, ptr, 1);
|
tiny_debug_ring_record(TINY_RING_EVENT_FREE_RETURN_MAG, (uint16_t)class_idx, ptr, 1);
|
||||||
HAK_STAT_FREE(class_idx);
|
HAK_STAT_FREE(class_idx);
|
||||||
@ -277,7 +277,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
|||||||
}
|
}
|
||||||
if (tls->count < tls->cap) {
|
if (tls->count < tls->cap) {
|
||||||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||||||
void* base = ptr_user_to_base_blind(ptr);
|
void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx));
|
||||||
tiny_tls_list_guard_push(class_idx, tls, base);
|
tiny_tls_list_guard_push(class_idx, tls, base);
|
||||||
tls_list_push(tls, base, class_idx);
|
tls_list_push(tls, base, class_idx);
|
||||||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_LOCAL, (uint16_t)class_idx, ptr, 0);
|
tiny_debug_ring_record(TINY_RING_EVENT_FREE_LOCAL, (uint16_t)class_idx, ptr, 0);
|
||||||
@ -290,7 +290,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
|||||||
}
|
}
|
||||||
{
|
{
|
||||||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||||||
void* base = ptr_user_to_base_blind(ptr);
|
void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx));
|
||||||
tiny_tls_list_guard_push(class_idx, tls, base);
|
tiny_tls_list_guard_push(class_idx, tls, base);
|
||||||
tls_list_push(tls, base, class_idx);
|
tls_list_push(tls, base, class_idx);
|
||||||
}
|
}
|
||||||
@ -332,8 +332,8 @@ void hak_tiny_free(void* ptr) {
|
|||||||
// Resolve class_idx from per-slab metadata instead of ss->size_class
|
// Resolve class_idx from per-slab metadata instead of ss->size_class
|
||||||
SuperSlab* ss = hak_super_lookup(ptr);
|
SuperSlab* ss = hak_super_lookup(ptr);
|
||||||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||||||
void* base = ptr_user_to_base_blind(ptr);
|
// void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr
|
||||||
int sidx = slab_index_for(ss, base);
|
int sidx = slab_index_for(ss, ptr);
|
||||||
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
|
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
|
||||||
TinySlabMeta* m = &ss->slabs[sidx];
|
TinySlabMeta* m = &ss->slabs[sidx];
|
||||||
if (m->class_idx < TINY_NUM_CLASSES) {
|
if (m->class_idx < TINY_NUM_CLASSES) {
|
||||||
@ -392,8 +392,8 @@ void hak_tiny_free(void* ptr) {
|
|||||||
// Resolve class_idx from per-slab metadata instead of ss->size_class
|
// Resolve class_idx from per-slab metadata instead of ss->size_class
|
||||||
SuperSlab* ss = hak_super_lookup(ptr);
|
SuperSlab* ss = hak_super_lookup(ptr);
|
||||||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||||||
void* base = ptr_user_to_base_blind(ptr);
|
// void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr
|
||||||
int sidx = slab_index_for(ss, base);
|
int sidx = slab_index_for(ss, ptr);
|
||||||
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
|
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
|
||||||
TinySlabMeta* m = &ss->slabs[sidx];
|
TinySlabMeta* m = &ss->slabs[sidx];
|
||||||
if (m->class_idx < TINY_NUM_CLASSES) {
|
if (m->class_idx < TINY_NUM_CLASSES) {
|
||||||
@ -470,8 +470,8 @@ void hak_tiny_free(void* ptr) {
|
|||||||
if (g_use_superslab) {
|
if (g_use_superslab) {
|
||||||
fast_ss = hak_super_lookup(ptr);
|
fast_ss = hak_super_lookup(ptr);
|
||||||
if (fast_ss && fast_ss->magic == SUPERSLAB_MAGIC) {
|
if (fast_ss && fast_ss->magic == SUPERSLAB_MAGIC) {
|
||||||
void* base = ptr_user_to_base_blind(ptr);
|
// void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr
|
||||||
int sidx = slab_index_for(fast_ss, base);
|
int sidx = slab_index_for(fast_ss, ptr);
|
||||||
if (sidx >= 0 && sidx < ss_slabs_capacity(fast_ss)) {
|
if (sidx >= 0 && sidx < ss_slabs_capacity(fast_ss)) {
|
||||||
TinySlabMeta* m = &fast_ss->slabs[sidx];
|
TinySlabMeta* m = &fast_ss->slabs[sidx];
|
||||||
if (m->class_idx < TINY_NUM_CLASSES) {
|
if (m->class_idx < TINY_NUM_CLASSES) {
|
||||||
@ -494,8 +494,8 @@ void hak_tiny_free(void* ptr) {
|
|||||||
int ss_cls = -1, ts_cls = -1;
|
int ss_cls = -1, ts_cls = -1;
|
||||||
SuperSlab* chk_ss = fast_ss ? fast_ss : (g_use_superslab ? hak_super_lookup(ptr) : NULL);
|
SuperSlab* chk_ss = fast_ss ? fast_ss : (g_use_superslab ? hak_super_lookup(ptr) : NULL);
|
||||||
if (chk_ss && chk_ss->magic == SUPERSLAB_MAGIC) {
|
if (chk_ss && chk_ss->magic == SUPERSLAB_MAGIC) {
|
||||||
void* base = ptr_user_to_base_blind(ptr);
|
// void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr
|
||||||
int sidx = slab_index_for(chk_ss, base);
|
int sidx = slab_index_for(chk_ss, ptr);
|
||||||
if (sidx >= 0 && sidx < ss_slabs_capacity(chk_ss)) {
|
if (sidx >= 0 && sidx < ss_slabs_capacity(chk_ss)) {
|
||||||
TinySlabMeta* m = &chk_ss->slabs[sidx];
|
TinySlabMeta* m = &chk_ss->slabs[sidx];
|
||||||
if (m->class_idx < TINY_NUM_CLASSES) {
|
if (m->class_idx < TINY_NUM_CLASSES) {
|
||||||
@ -516,7 +516,7 @@ void hak_tiny_free(void* ptr) {
|
|||||||
}
|
}
|
||||||
if (fast_class_idx >= 0 && g_fast_enable && g_fast_cap[fast_class_idx] != 0) {
|
if (fast_class_idx >= 0 && g_fast_enable && g_fast_cap[fast_class_idx] != 0) {
|
||||||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||||||
void* base2 = ptr_user_to_base_blind(ptr);
|
void* base2 = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), fast_class_idx));
|
||||||
// PRIORITY 1: Try FastCache first (bypasses SLL when Front-Direct)
|
// PRIORITY 1: Try FastCache first (bypasses SLL when Front-Direct)
|
||||||
int pushed = 0;
|
int pushed = 0;
|
||||||
// Phase 7-Step5: Use config macro for dead code elimination in PGO mode
|
// Phase 7-Step5: Use config macro for dead code elimination in PGO mode
|
||||||
@ -543,8 +543,8 @@ void hak_tiny_free(void* ptr) {
|
|||||||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||||||
// Derive class from per-slab meta
|
// Derive class from per-slab meta
|
||||||
int cls = -1;
|
int cls = -1;
|
||||||
void* base = ptr_user_to_base_blind(ptr);
|
// void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr
|
||||||
int sidx = slab_index_for(ss, base);
|
int sidx = slab_index_for(ss, ptr);
|
||||||
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
|
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
|
||||||
TinySlabMeta* m = &ss->slabs[sidx];
|
TinySlabMeta* m = &ss->slabs[sidx];
|
||||||
if (m->class_idx < TINY_NUM_CLASSES) {
|
if (m->class_idx < TINY_NUM_CLASSES) {
|
||||||
|
|||||||
@ -29,6 +29,7 @@
|
|||||||
#include "superslab/superslab_inline.h" // For slab_index_for (cross-thread check)
|
#include "superslab/superslab_inline.h" // For slab_index_for (cross-thread check)
|
||||||
#include "box/ss_slab_meta_box.h" // Phase 3d-A: SlabMeta Box boundary
|
#include "box/ss_slab_meta_box.h" // Phase 3d-A: SlabMeta Box boundary
|
||||||
#include "box/free_remote_box.h" // For tiny_free_remote_box (cross-thread routing)
|
#include "box/free_remote_box.h" // For tiny_free_remote_box (cross-thread routing)
|
||||||
|
#include "box/ptr_conversion_box.h" // Phase 10: Correct pointer arithmetic
|
||||||
|
|
||||||
// Phase 7: Header-based ultra-fast free
|
// Phase 7: Header-based ultra-fast free
|
||||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||||
@ -57,11 +58,11 @@ static inline uint32_t tiny_self_u32_local(void) {
|
|||||||
// vs Current: 330+ lines, 500+ cycles (100x faster!)
|
// vs Current: 330+ lines, 500+ cycles (100x faster!)
|
||||||
//
|
//
|
||||||
// Assembly (x86-64, release build):
|
// Assembly (x86-64, release build):
|
||||||
// movzbl -0x1(%rdi),%eax # Read header (class_idx)
|
// movzbl -0x1(%rdi),%eax // Read header (class_idx)
|
||||||
// mov g_tls_sll_head(,%rax,8),%rdx # Load head
|
// mov g_tls_sll_head(,%rax,8),%rdx // Load head
|
||||||
// mov %rdx,(%rdi) # ptr->next = head
|
// mov %rdx,(%rdi) // ptr->next = head
|
||||||
// mov %rdi,g_tls_sll_head(,%rax,8) # head = ptr
|
// mov %rdi,g_tls_sll_head(,%rax,8) // head = ptr
|
||||||
// addl $0x1,g_tls_sll_count(,%rax,4) # count++
|
// addl $0x1,g_tls_sll_count(,%rax,4) // count++
|
||||||
// ret
|
// ret
|
||||||
//
|
//
|
||||||
// Expected: 3-5 instructions, 5-10 cycles (L1 hit)
|
// Expected: 3-5 instructions, 5-10 cycles (L1 hit)
|
||||||
@ -79,7 +80,8 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
|
|||||||
// Expected: 9M → 30-50M ops/s recovery (+226-443%)
|
// Expected: 9M → 30-50M ops/s recovery (+226-443%)
|
||||||
|
|
||||||
// CRITICAL: Check if header is accessible before reading
|
// CRITICAL: Check if header is accessible before reading
|
||||||
void* header_addr = (char*)ptr - 1;
|
// FIX: Use ptr directly, not ptr-1, for validation if possible, or trust lookup
|
||||||
|
// void* header_addr = (char*)ptr - 1; // <-- Dangerous for C0
|
||||||
|
|
||||||
#if !HAKMEM_BUILD_RELEASE
|
#if !HAKMEM_BUILD_RELEASE
|
||||||
// Debug: Validate header accessibility (metadata-based check)
|
// Debug: Validate header accessibility (metadata-based check)
|
||||||
@ -87,7 +89,7 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
|
|||||||
// Strategy: Trust internal metadata (registry ensures memory is valid)
|
// Strategy: Trust internal metadata (registry ensures memory is valid)
|
||||||
// Benefit: Catch invalid pointers via header magic validation below
|
// Benefit: Catch invalid pointers via header magic validation below
|
||||||
extern int hak_is_memory_readable(void* addr);
|
extern int hak_is_memory_readable(void* addr);
|
||||||
if (!hak_is_memory_readable(header_addr)) {
|
if (!hak_is_memory_readable(ptr)) { // Check ptr, not header_addr
|
||||||
return 0; // Header not accessible - not a Tiny allocation
|
return 0; // Header not accessible - not a Tiny allocation
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
@ -118,9 +120,11 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
|
|||||||
|
|
||||||
if (__builtin_expect(g_use_class_map, 1)) {
|
if (__builtin_expect(g_use_class_map, 1)) {
|
||||||
// P1.2: class_map path - avoid Header read
|
// P1.2: class_map path - avoid Header read
|
||||||
SuperSlab* ss = ss_fast_lookup((uint8_t*)ptr - 1);
|
// FIX: Use ptr (USER) for lookup, NOT ptr-1
|
||||||
|
SuperSlab* ss = ss_fast_lookup(ptr);
|
||||||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||||||
int slab_idx = slab_index_for(ss, (uint8_t*)ptr - 1);
|
// FIX: Use ptr (USER) for slab index
|
||||||
|
int slab_idx = slab_index_for(ss, ptr);
|
||||||
if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
|
if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
|
||||||
int map_class = tiny_get_class_from_ss(ss, slab_idx);
|
int map_class = tiny_get_class_from_ss(ss, slab_idx);
|
||||||
if (map_class < TINY_NUM_CLASSES) {
|
if (map_class < TINY_NUM_CLASSES) {
|
||||||
@ -161,9 +165,11 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
|
|||||||
// Cross-check header class vs meta class (if available from fast lookup)
|
// Cross-check header class vs meta class (if available from fast lookup)
|
||||||
do {
|
do {
|
||||||
// Try fast owner slab lookup to get meta->class_idx for comparison
|
// Try fast owner slab lookup to get meta->class_idx for comparison
|
||||||
SuperSlab* ss = hak_super_lookup((uint8_t*)ptr - 1);
|
// FIX: Use ptr (USER)
|
||||||
|
SuperSlab* ss = hak_super_lookup(ptr);
|
||||||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||||||
int sidx = slab_index_for(ss, (uint8_t*)ptr - 1);
|
// FIX: Use ptr (USER)
|
||||||
|
int sidx = slab_index_for(ss, ptr);
|
||||||
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
|
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
|
||||||
TinySlabMeta* m = &ss->slabs[sidx];
|
TinySlabMeta* m = &ss->slabs[sidx];
|
||||||
uint8_t meta_cls = m->class_idx;
|
uint8_t meta_cls = m->class_idx;
|
||||||
@ -217,7 +223,8 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
|
|||||||
// 3. Push base to TLS freelist (4 instructions, 5-7 cycles)
|
// 3. Push base to TLS freelist (4 instructions, 5-7 cycles)
|
||||||
// Must push base (block start) not user pointer!
|
// Must push base (block start) not user pointer!
|
||||||
// Phase E1: ALL classes (C0-C7) have 1-byte header → base = ptr-1
|
// Phase E1: ALL classes (C0-C7) have 1-byte header → base = ptr-1
|
||||||
void* base = (char*)ptr - 1;
|
// FIX: Use ptr_user_to_base(ptr, class_idx) logic
|
||||||
|
void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx));
|
||||||
|
|
||||||
// Phase 14-C: UltraHot は free 時に横取りしない(Borrowing 設計)
|
// Phase 14-C: UltraHot は free 時に横取りしない(Borrowing 設計)
|
||||||
// → 正史(TLS SLL)の在庫を正しく保つ
|
// → 正史(TLS SLL)の在庫を正しく保つ
|
||||||
@ -237,6 +244,7 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
|
|||||||
// Phase 12 optimization: Use fast mask-based lookup (~5-10 cycles vs 50-100)
|
// Phase 12 optimization: Use fast mask-based lookup (~5-10 cycles vs 50-100)
|
||||||
SuperSlab* ss = ss_fast_lookup(base);
|
SuperSlab* ss = ss_fast_lookup(base);
|
||||||
if (__builtin_expect(ss != NULL, 1)) {
|
if (__builtin_expect(ss != NULL, 1)) {
|
||||||
|
// FIX: slab_index_for on BASE (since base is correct now)
|
||||||
int slab_idx = slab_index_for(ss, base);
|
int slab_idx = slab_index_for(ss, base);
|
||||||
if (__builtin_expect(slab_idx >= 0, 1)) {
|
if (__builtin_expect(slab_idx >= 0, 1)) {
|
||||||
uint32_t self_tid = tiny_self_u32_local();
|
uint32_t self_tid = tiny_self_u32_local();
|
||||||
|
|||||||
@ -127,6 +127,7 @@
|
|||||||
if (owner_ss && owner_ss->magic == SUPERSLAB_MAGIC) {
|
if (owner_ss && owner_ss->magic == SUPERSLAB_MAGIC) {
|
||||||
// Direct freelist push (same as old hak_tiny_free_superslab)
|
// Direct freelist push (same as old hak_tiny_free_superslab)
|
||||||
// Phase 10: it.ptr is BASE.
|
// Phase 10: it.ptr is BASE.
|
||||||
|
// FIX: it.ptr is BASE, use it directly (do not subtract 1)
|
||||||
void* base = it.ptr;
|
void* base = it.ptr;
|
||||||
int slab_idx = slab_index_for(owner_ss, base);
|
int slab_idx = slab_index_for(owner_ss, base);
|
||||||
// BUGFIX: Validate slab_idx before array access (prevents OOB)
|
// BUGFIX: Validate slab_idx before array access (prevents OOB)
|
||||||
@ -320,8 +321,8 @@
|
|||||||
SuperSlab* ss_owner = hak_super_lookup(it.ptr);
|
SuperSlab* ss_owner = hak_super_lookup(it.ptr);
|
||||||
if (ss_owner && ss_owner->magic == SUPERSLAB_MAGIC) {
|
if (ss_owner && ss_owner->magic == SUPERSLAB_MAGIC) {
|
||||||
// SuperSlab spill - return to freelist
|
// SuperSlab spill - return to freelist
|
||||||
// ✅ FIX: Phase E1-CORRECT - Convert USER → BASE before slab index calculation
|
// FIX: it.ptr is BASE, use directly
|
||||||
void* base = (void*)((uint8_t*)it.ptr - 1);
|
void* base = it.ptr;
|
||||||
int slab_idx = slab_index_for(ss_owner, base);
|
int slab_idx = slab_index_for(ss_owner, base);
|
||||||
// BUGFIX: Validate slab_idx before array access (prevents OOB)
|
// BUGFIX: Validate slab_idx before array access (prevents OOB)
|
||||||
if (slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss_owner)) {
|
if (slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss_owner)) {
|
||||||
@ -430,7 +431,7 @@
|
|||||||
mag->top++;
|
mag->top++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (!tiny_optional_push(class_idx, (void*)((uint8_t*)ptr - 1))) { // Phase E1-CORRECT
|
} else if (!tiny_optional_push(class_idx, HAK_BASE_TO_RAW(hak_user_to_base(HAK_USER_FROM_RAW(ptr))))) { // FIX: use ptr_user_to_base
|
||||||
// Phase 10: Use hak_base_ptr_t
|
// Phase 10: Use hak_base_ptr_t
|
||||||
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
|
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
|
||||||
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
|
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
|
||||||
@ -467,7 +468,7 @@
|
|||||||
mag->top++;
|
mag->top++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (!tiny_optional_push(class_idx, (void*)((uint8_t*)ptr - 1))) { // Phase E1-CORRECT
|
} else if (!tiny_optional_push(class_idx, HAK_BASE_TO_RAW(hak_user_to_base(HAK_USER_FROM_RAW(ptr))))) { // FIX: use ptr_user_to_base
|
||||||
// Phase 10: Use hak_base_ptr_t
|
// Phase 10: Use hak_base_ptr_t
|
||||||
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
|
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
|
||||||
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
|
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
|
||||||
@ -497,7 +498,8 @@
|
|||||||
return;
|
return;
|
||||||
} else if (slab) {
|
} else if (slab) {
|
||||||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||||||
void* base = (void*)((uint8_t*)ptr - 1);
|
// FIX: Use ptr_user_to_base to get correct base
|
||||||
|
void* base = HAK_BASE_TO_RAW(hak_user_to_base(HAK_USER_FROM_RAW(ptr)));
|
||||||
tiny_remote_push(slab, base);
|
tiny_remote_push(slab, base);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user