Fix ptr_user_to_base_blind regression: use class-aware base calculation and correct slab index lookup

This commit is contained in:
Moe Charm (CI)
2025-12-03 12:29:31 +09:00
parent c2716f5c01
commit c91602f181
4 changed files with 65 additions and 55 deletions

View File

@ -251,7 +251,7 @@ endif
# Benchmark targets # Benchmark targets
BENCH_HAKMEM = bench_allocators_hakmem BENCH_HAKMEM = bench_allocators_hakmem
BENCH_SYSTEM = bench_allocators_system BENCH_SYSTEM = bench_allocators_system
BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o bench_allocators_hakmem.o BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/pagefault_telemetry_box.o core/box/tiny_sizeclass_hist_box.o core/box/tiny_env_box.o core/box/wrapper_env_box.o core/page_arena.o core/front/tiny_unified_cache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o bench_allocators_hakmem.o
BENCH_HAKMEM_OBJS = $(BENCH_HAKMEM_OBJS_BASE) BENCH_HAKMEM_OBJS = $(BENCH_HAKMEM_OBJS_BASE)
ifeq ($(POOL_TLS_PHASE1),1) ifeq ($(POOL_TLS_PHASE1),1)
BENCH_HAKMEM_OBJS += pool_tls.o pool_refill.o pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o BENCH_HAKMEM_OBJS += pool_tls.o pool_refill.o pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o
@ -428,7 +428,7 @@ test-box-refactor: box-refactor
./larson_hakmem 10 8 128 1024 1 12345 4 ./larson_hakmem 10 8 128 1024 1 12345 4
# Phase 4: Tiny Pool benchmarks (properly linked with hakmem) # Phase 4: Tiny Pool benchmarks (properly linked with hakmem)
TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/tiny_sizeclass_hist_box.o core/box/pagefault_telemetry_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o superslab_allocate.o superslab_stats.o superslab_cache.o superslab_ace.o superslab_slab.o superslab_backend.o superslab_head.o hakmem_smallmid.o hakmem_smallmid_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/unified_batch_box.o core/box/prewarm_box.o core/box/ss_hot_prewarm_box.o core/box/front_metrics_box.o core/box/bench_fast_box.o core/box/ss_addr_map_box.o core/box/ss_tls_hint_box.o core/box/slab_recycling_box.o core/box/tiny_sizeclass_hist_box.o core/box/pagefault_telemetry_box.o core/box/tiny_env_box.o core/box/wrapper_env_box.o core/page_arena.o core/front/tiny_unified_cache.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_super_registry.o hakmem_shared_pool.o hakmem_shared_pool_acquire.o hakmem_shared_pool_release.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/tiny_alloc_fast_push.o core/link_stubs.o core/tiny_failfast.o
TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE) TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE)
ifeq ($(POOL_TLS_PHASE1),1) ifeq ($(POOL_TLS_PHASE1),1)
TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o

View File

@ -125,8 +125,8 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
if (!ss || ss->magic != SUPERSLAB_MAGIC) return; if (!ss || ss->magic != SUPERSLAB_MAGIC) return;
// Derive class_idx from per-slab metadata instead of ss->size_class // Derive class_idx from per-slab metadata instead of ss->size_class
int class_idx = -1; int class_idx = -1;
void* base = ptr_user_to_base_blind(ptr); // void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr (USER) directly
int slab_idx = slab_index_for(ss, base); int slab_idx = slab_index_for(ss, ptr); // FIX: slab_index_for works better with ptr (USER) for C0/C7
if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) { if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
TinySlabMeta* meta_probe = &ss->slabs[slab_idx]; TinySlabMeta* meta_probe = &ss->slabs[slab_idx];
if (meta_probe->class_idx < TINY_NUM_CLASSES) { if (meta_probe->class_idx < TINY_NUM_CLASSES) {
@ -155,9 +155,9 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
} }
tiny_debug_ring_record(TINY_RING_EVENT_FREE_ENTER, (uint16_t)class_idx, ptr, 0); tiny_debug_ring_record(TINY_RING_EVENT_FREE_ENTER, (uint16_t)class_idx, ptr, 0);
// Detect cross-thread: cross-thread free MUST go via superslab path // Detect cross-thread: cross-thread free MUST go via superslab path
// FIX: Phase E1-CORRECT - Convert USER → BASE before slab index calculation // FIX: Use ptr (USER) for slab index calculation to handle C0/C7 boundary correctly
base = ptr_user_to_base_blind(ptr); // base = ptr_user_to_base_blind(ptr);
slab_idx = slab_index_for(ss, base); slab_idx = slab_index_for(ss, ptr);
int ss_cap = ss_slabs_capacity(ss); int ss_cap = ss_slabs_capacity(ss);
if (__builtin_expect(slab_idx < 0 || slab_idx >= ss_cap, 0)) { if (__builtin_expect(slab_idx < 0 || slab_idx >= ss_cap, 0)) {
tiny_debug_ring_record(TINY_RING_EVENT_SUPERSLAB_ADOPT_FAIL, (uint16_t)0xFEu, ss, (uintptr_t)slab_idx); tiny_debug_ring_record(TINY_RING_EVENT_SUPERSLAB_ADOPT_FAIL, (uint16_t)0xFEu, ss, (uintptr_t)slab_idx);
@ -167,8 +167,8 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
if (__builtin_expect(g_tiny_safe_free, 0)) { if (__builtin_expect(g_tiny_safe_free, 0)) {
size_t blk = g_tiny_class_sizes[class_idx]; size_t blk = g_tiny_class_sizes[class_idx];
uint8_t* slab_base = tiny_slab_base_for(ss, slab_idx); uint8_t* slab_base = tiny_slab_base_for(ss, slab_idx);
// Phase E1-CORRECT: All classes have headers, validate block base (ptr-1) not user ptr // Phase E1-CORRECT: All classes have headers, validate block base using known class_idx
uintptr_t delta = (uintptr_t)ptr_user_to_base_blind(ptr) - (uintptr_t)slab_base; uintptr_t delta = (uintptr_t)HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx)) - (uintptr_t)slab_base;
int cap_ok = (meta->capacity > 0) ? 1 : 0; int cap_ok = (meta->capacity > 0) ? 1 : 0;
int align_ok = (delta % blk) == 0; int align_ok = (delta % blk) == 0;
int range_ok = cap_ok && (delta / blk) < meta->capacity; int range_ok = cap_ok && (delta / blk) < meta->capacity;
@ -216,7 +216,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
if (__builtin_expect(g_debug_fast0, 0)) { if (__builtin_expect(g_debug_fast0, 0)) {
tiny_debug_ring_record(TINY_RING_EVENT_FRONT_BYPASS, (uint16_t)class_idx, ptr, (uintptr_t)slab_idx); tiny_debug_ring_record(TINY_RING_EVENT_FRONT_BYPASS, (uint16_t)class_idx, ptr, (uintptr_t)slab_idx);
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header // Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
void* base = ptr_user_to_base_blind(ptr); void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx));
void* prev = meta->freelist; void* prev = meta->freelist;
tiny_next_write(class_idx, base, prev); // Box API: uses offset 1 for headers tiny_next_write(class_idx, base, prev); // Box API: uses offset 1 for headers
meta->freelist = base; meta->freelist = base;
@ -234,7 +234,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
// Front-V2: try to return to TLS magazine first (A/B, default OFF) // Front-V2: try to return to TLS magazine first (A/B, default OFF)
// Phase 7-Step8: Use config macro for dead code elimination in PGO mode // Phase 7-Step8: Use config macro for dead code elimination in PGO mode
if (__builtin_expect(TINY_FRONT_HEAP_V2_ENABLED && class_idx <= 3, 0)) { if (__builtin_expect(TINY_FRONT_HEAP_V2_ENABLED && class_idx <= 3, 0)) {
void* base = ptr_user_to_base_blind(ptr); void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx));
if (tiny_heap_v2_try_push(class_idx, base)) { if (tiny_heap_v2_try_push(class_idx, base)) {
tiny_debug_ring_record(TINY_RING_EVENT_FREE_FAST, (uint16_t)class_idx, ptr, slab_idx); tiny_debug_ring_record(TINY_RING_EVENT_FREE_FAST, (uint16_t)class_idx, ptr, slab_idx);
HAK_STAT_FREE(class_idx); HAK_STAT_FREE(class_idx);
@ -244,7 +244,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
if (g_fast_enable && g_fast_cap[class_idx] != 0) { if (g_fast_enable && g_fast_cap[class_idx] != 0) {
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header // Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
void* base = ptr_user_to_base_blind(ptr); void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx));
int pushed = 0; int pushed = 0;
// Phase 7-Step5: Use config macro for dead code elimination in PGO mode // Phase 7-Step5: Use config macro for dead code elimination in PGO mode
if (__builtin_expect(TINY_FRONT_FASTCACHE_ENABLED && class_idx <= 3, 1)) { if (__builtin_expect(TINY_FRONT_FASTCACHE_ENABLED && class_idx <= 3, 1)) {
@ -268,7 +268,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
// TinyHotMag front push8/16/32B, A/B // TinyHotMag front push8/16/32B, A/B
if (__builtin_expect(g_hotmag_enable && class_idx <= 2, 1)) { if (__builtin_expect(g_hotmag_enable && class_idx <= 2, 1)) {
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header // Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
void* base = ptr_user_to_base_blind(ptr); void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx));
if (hotmag_push(class_idx, base)) { if (hotmag_push(class_idx, base)) {
tiny_debug_ring_record(TINY_RING_EVENT_FREE_RETURN_MAG, (uint16_t)class_idx, ptr, 1); tiny_debug_ring_record(TINY_RING_EVENT_FREE_RETURN_MAG, (uint16_t)class_idx, ptr, 1);
HAK_STAT_FREE(class_idx); HAK_STAT_FREE(class_idx);
@ -277,7 +277,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
} }
if (tls->count < tls->cap) { if (tls->count < tls->cap) {
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header // Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
void* base = ptr_user_to_base_blind(ptr); void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx));
tiny_tls_list_guard_push(class_idx, tls, base); tiny_tls_list_guard_push(class_idx, tls, base);
tls_list_push(tls, base, class_idx); tls_list_push(tls, base, class_idx);
tiny_debug_ring_record(TINY_RING_EVENT_FREE_LOCAL, (uint16_t)class_idx, ptr, 0); tiny_debug_ring_record(TINY_RING_EVENT_FREE_LOCAL, (uint16_t)class_idx, ptr, 0);
@ -290,7 +290,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
} }
{ {
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header // Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
void* base = ptr_user_to_base_blind(ptr); void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx));
tiny_tls_list_guard_push(class_idx, tls, base); tiny_tls_list_guard_push(class_idx, tls, base);
tls_list_push(tls, base, class_idx); tls_list_push(tls, base, class_idx);
} }
@ -332,8 +332,8 @@ void hak_tiny_free(void* ptr) {
// Resolve class_idx from per-slab metadata instead of ss->size_class // Resolve class_idx from per-slab metadata instead of ss->size_class
SuperSlab* ss = hak_super_lookup(ptr); SuperSlab* ss = hak_super_lookup(ptr);
if (ss && ss->magic == SUPERSLAB_MAGIC) { if (ss && ss->magic == SUPERSLAB_MAGIC) {
void* base = ptr_user_to_base_blind(ptr); // void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr
int sidx = slab_index_for(ss, base); int sidx = slab_index_for(ss, ptr);
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) { if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
TinySlabMeta* m = &ss->slabs[sidx]; TinySlabMeta* m = &ss->slabs[sidx];
if (m->class_idx < TINY_NUM_CLASSES) { if (m->class_idx < TINY_NUM_CLASSES) {
@ -392,8 +392,8 @@ void hak_tiny_free(void* ptr) {
// Resolve class_idx from per-slab metadata instead of ss->size_class // Resolve class_idx from per-slab metadata instead of ss->size_class
SuperSlab* ss = hak_super_lookup(ptr); SuperSlab* ss = hak_super_lookup(ptr);
if (ss && ss->magic == SUPERSLAB_MAGIC) { if (ss && ss->magic == SUPERSLAB_MAGIC) {
void* base = ptr_user_to_base_blind(ptr); // void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr
int sidx = slab_index_for(ss, base); int sidx = slab_index_for(ss, ptr);
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) { if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
TinySlabMeta* m = &ss->slabs[sidx]; TinySlabMeta* m = &ss->slabs[sidx];
if (m->class_idx < TINY_NUM_CLASSES) { if (m->class_idx < TINY_NUM_CLASSES) {
@ -470,8 +470,8 @@ void hak_tiny_free(void* ptr) {
if (g_use_superslab) { if (g_use_superslab) {
fast_ss = hak_super_lookup(ptr); fast_ss = hak_super_lookup(ptr);
if (fast_ss && fast_ss->magic == SUPERSLAB_MAGIC) { if (fast_ss && fast_ss->magic == SUPERSLAB_MAGIC) {
void* base = ptr_user_to_base_blind(ptr); // void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr
int sidx = slab_index_for(fast_ss, base); int sidx = slab_index_for(fast_ss, ptr);
if (sidx >= 0 && sidx < ss_slabs_capacity(fast_ss)) { if (sidx >= 0 && sidx < ss_slabs_capacity(fast_ss)) {
TinySlabMeta* m = &fast_ss->slabs[sidx]; TinySlabMeta* m = &fast_ss->slabs[sidx];
if (m->class_idx < TINY_NUM_CLASSES) { if (m->class_idx < TINY_NUM_CLASSES) {
@ -494,8 +494,8 @@ void hak_tiny_free(void* ptr) {
int ss_cls = -1, ts_cls = -1; int ss_cls = -1, ts_cls = -1;
SuperSlab* chk_ss = fast_ss ? fast_ss : (g_use_superslab ? hak_super_lookup(ptr) : NULL); SuperSlab* chk_ss = fast_ss ? fast_ss : (g_use_superslab ? hak_super_lookup(ptr) : NULL);
if (chk_ss && chk_ss->magic == SUPERSLAB_MAGIC) { if (chk_ss && chk_ss->magic == SUPERSLAB_MAGIC) {
void* base = ptr_user_to_base_blind(ptr); // void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr
int sidx = slab_index_for(chk_ss, base); int sidx = slab_index_for(chk_ss, ptr);
if (sidx >= 0 && sidx < ss_slabs_capacity(chk_ss)) { if (sidx >= 0 && sidx < ss_slabs_capacity(chk_ss)) {
TinySlabMeta* m = &chk_ss->slabs[sidx]; TinySlabMeta* m = &chk_ss->slabs[sidx];
if (m->class_idx < TINY_NUM_CLASSES) { if (m->class_idx < TINY_NUM_CLASSES) {
@ -516,7 +516,7 @@ void hak_tiny_free(void* ptr) {
} }
if (fast_class_idx >= 0 && g_fast_enable && g_fast_cap[fast_class_idx] != 0) { if (fast_class_idx >= 0 && g_fast_enable && g_fast_cap[fast_class_idx] != 0) {
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header // Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
void* base2 = ptr_user_to_base_blind(ptr); void* base2 = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), fast_class_idx));
// PRIORITY 1: Try FastCache first (bypasses SLL when Front-Direct) // PRIORITY 1: Try FastCache first (bypasses SLL when Front-Direct)
int pushed = 0; int pushed = 0;
// Phase 7-Step5: Use config macro for dead code elimination in PGO mode // Phase 7-Step5: Use config macro for dead code elimination in PGO mode
@ -543,8 +543,8 @@ void hak_tiny_free(void* ptr) {
if (ss && ss->magic == SUPERSLAB_MAGIC) { if (ss && ss->magic == SUPERSLAB_MAGIC) {
// Derive class from per-slab meta // Derive class from per-slab meta
int cls = -1; int cls = -1;
void* base = ptr_user_to_base_blind(ptr); // void* base = ptr_user_to_base_blind(ptr); // FIX: Use ptr
int sidx = slab_index_for(ss, base); int sidx = slab_index_for(ss, ptr);
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) { if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
TinySlabMeta* m = &ss->slabs[sidx]; TinySlabMeta* m = &ss->slabs[sidx];
if (m->class_idx < TINY_NUM_CLASSES) { if (m->class_idx < TINY_NUM_CLASSES) {

View File

@ -29,6 +29,7 @@
#include "superslab/superslab_inline.h" // For slab_index_for (cross-thread check) #include "superslab/superslab_inline.h" // For slab_index_for (cross-thread check)
#include "box/ss_slab_meta_box.h" // Phase 3d-A: SlabMeta Box boundary #include "box/ss_slab_meta_box.h" // Phase 3d-A: SlabMeta Box boundary
#include "box/free_remote_box.h" // For tiny_free_remote_box (cross-thread routing) #include "box/free_remote_box.h" // For tiny_free_remote_box (cross-thread routing)
#include "box/ptr_conversion_box.h" // Phase 10: Correct pointer arithmetic
// Phase 7: Header-based ultra-fast free // Phase 7: Header-based ultra-fast free
#if HAKMEM_TINY_HEADER_CLASSIDX #if HAKMEM_TINY_HEADER_CLASSIDX
@ -57,11 +58,11 @@ static inline uint32_t tiny_self_u32_local(void) {
// vs Current: 330+ lines, 500+ cycles (100x faster!) // vs Current: 330+ lines, 500+ cycles (100x faster!)
// //
// Assembly (x86-64, release build): // Assembly (x86-64, release build):
// movzbl -0x1(%rdi),%eax # Read header (class_idx) // movzbl -0x1(%rdi),%eax // Read header (class_idx)
// mov g_tls_sll_head(,%rax,8),%rdx # Load head // mov g_tls_sll_head(,%rax,8),%rdx // Load head
// mov %rdx,(%rdi) # ptr->next = head // mov %rdx,(%rdi) // ptr->next = head
// mov %rdi,g_tls_sll_head(,%rax,8) # head = ptr // mov %rdi,g_tls_sll_head(,%rax,8) // head = ptr
// addl $0x1,g_tls_sll_count(,%rax,4) # count++ // addl $0x1,g_tls_sll_count(,%rax,4) // count++
// ret // ret
// //
// Expected: 3-5 instructions, 5-10 cycles (L1 hit) // Expected: 3-5 instructions, 5-10 cycles (L1 hit)
@ -79,7 +80,8 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
// Expected: 9M → 30-50M ops/s recovery (+226-443%) // Expected: 9M → 30-50M ops/s recovery (+226-443%)
// CRITICAL: Check if header is accessible before reading // CRITICAL: Check if header is accessible before reading
void* header_addr = (char*)ptr - 1; // FIX: Use ptr directly, not ptr-1, for validation if possible, or trust lookup
// void* header_addr = (char*)ptr - 1; // <-- Dangerous for C0
#if !HAKMEM_BUILD_RELEASE #if !HAKMEM_BUILD_RELEASE
// Debug: Validate header accessibility (metadata-based check) // Debug: Validate header accessibility (metadata-based check)
@ -87,7 +89,7 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
// Strategy: Trust internal metadata (registry ensures memory is valid) // Strategy: Trust internal metadata (registry ensures memory is valid)
// Benefit: Catch invalid pointers via header magic validation below // Benefit: Catch invalid pointers via header magic validation below
extern int hak_is_memory_readable(void* addr); extern int hak_is_memory_readable(void* addr);
if (!hak_is_memory_readable(header_addr)) { if (!hak_is_memory_readable(ptr)) { // Check ptr, not header_addr
return 0; // Header not accessible - not a Tiny allocation return 0; // Header not accessible - not a Tiny allocation
} }
#else #else
@ -118,9 +120,11 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
if (__builtin_expect(g_use_class_map, 1)) { if (__builtin_expect(g_use_class_map, 1)) {
// P1.2: class_map path - avoid Header read // P1.2: class_map path - avoid Header read
SuperSlab* ss = ss_fast_lookup((uint8_t*)ptr - 1); // FIX: Use ptr (USER) for lookup, NOT ptr-1
SuperSlab* ss = ss_fast_lookup(ptr);
if (ss && ss->magic == SUPERSLAB_MAGIC) { if (ss && ss->magic == SUPERSLAB_MAGIC) {
int slab_idx = slab_index_for(ss, (uint8_t*)ptr - 1); // FIX: Use ptr (USER) for slab index
int slab_idx = slab_index_for(ss, ptr);
if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) { if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
int map_class = tiny_get_class_from_ss(ss, slab_idx); int map_class = tiny_get_class_from_ss(ss, slab_idx);
if (map_class < TINY_NUM_CLASSES) { if (map_class < TINY_NUM_CLASSES) {
@ -161,9 +165,11 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
// Cross-check header class vs meta class (if available from fast lookup) // Cross-check header class vs meta class (if available from fast lookup)
do { do {
// Try fast owner slab lookup to get meta->class_idx for comparison // Try fast owner slab lookup to get meta->class_idx for comparison
SuperSlab* ss = hak_super_lookup((uint8_t*)ptr - 1); // FIX: Use ptr (USER)
SuperSlab* ss = hak_super_lookup(ptr);
if (ss && ss->magic == SUPERSLAB_MAGIC) { if (ss && ss->magic == SUPERSLAB_MAGIC) {
int sidx = slab_index_for(ss, (uint8_t*)ptr - 1); // FIX: Use ptr (USER)
int sidx = slab_index_for(ss, ptr);
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) { if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
TinySlabMeta* m = &ss->slabs[sidx]; TinySlabMeta* m = &ss->slabs[sidx];
uint8_t meta_cls = m->class_idx; uint8_t meta_cls = m->class_idx;
@ -217,7 +223,8 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
// 3. Push base to TLS freelist (4 instructions, 5-7 cycles) // 3. Push base to TLS freelist (4 instructions, 5-7 cycles)
// Must push base (block start) not user pointer! // Must push base (block start) not user pointer!
// Phase E1: ALL classes (C0-C7) have 1-byte header → base = ptr-1 // Phase E1: ALL classes (C0-C7) have 1-byte header → base = ptr-1
void* base = (char*)ptr - 1; // FIX: Use ptr_user_to_base(ptr, class_idx) logic
void* base = HAK_BASE_TO_RAW(ptr_user_to_base(HAK_USER_FROM_RAW(ptr), class_idx));
// Phase 14-C: UltraHot は free 時に横取りしないBorrowing 設計) // Phase 14-C: UltraHot は free 時に横取りしないBorrowing 設計)
// → 正史TLS SLLの在庫を正しく保つ // → 正史TLS SLLの在庫を正しく保つ
@ -237,6 +244,7 @@ static inline int hak_tiny_free_fast_v2(void* ptr) {
// Phase 12 optimization: Use fast mask-based lookup (~5-10 cycles vs 50-100) // Phase 12 optimization: Use fast mask-based lookup (~5-10 cycles vs 50-100)
SuperSlab* ss = ss_fast_lookup(base); SuperSlab* ss = ss_fast_lookup(base);
if (__builtin_expect(ss != NULL, 1)) { if (__builtin_expect(ss != NULL, 1)) {
// FIX: slab_index_for on BASE (since base is correct now)
int slab_idx = slab_index_for(ss, base); int slab_idx = slab_index_for(ss, base);
if (__builtin_expect(slab_idx >= 0, 1)) { if (__builtin_expect(slab_idx >= 0, 1)) {
uint32_t self_tid = tiny_self_u32_local(); uint32_t self_tid = tiny_self_u32_local();

View File

@ -127,6 +127,7 @@
if (owner_ss && owner_ss->magic == SUPERSLAB_MAGIC) { if (owner_ss && owner_ss->magic == SUPERSLAB_MAGIC) {
// Direct freelist push (same as old hak_tiny_free_superslab) // Direct freelist push (same as old hak_tiny_free_superslab)
// Phase 10: it.ptr is BASE. // Phase 10: it.ptr is BASE.
// FIX: it.ptr is BASE, use it directly (do not subtract 1)
void* base = it.ptr; void* base = it.ptr;
int slab_idx = slab_index_for(owner_ss, base); int slab_idx = slab_index_for(owner_ss, base);
// BUGFIX: Validate slab_idx before array access (prevents OOB) // BUGFIX: Validate slab_idx before array access (prevents OOB)
@ -320,8 +321,8 @@
SuperSlab* ss_owner = hak_super_lookup(it.ptr); SuperSlab* ss_owner = hak_super_lookup(it.ptr);
if (ss_owner && ss_owner->magic == SUPERSLAB_MAGIC) { if (ss_owner && ss_owner->magic == SUPERSLAB_MAGIC) {
// SuperSlab spill - return to freelist // SuperSlab spill - return to freelist
// FIX: Phase E1-CORRECT - Convert USER → BASE before slab index calculation // FIX: it.ptr is BASE, use directly
void* base = (void*)((uint8_t*)it.ptr - 1); void* base = it.ptr;
int slab_idx = slab_index_for(ss_owner, base); int slab_idx = slab_index_for(ss_owner, base);
// BUGFIX: Validate slab_idx before array access (prevents OOB) // BUGFIX: Validate slab_idx before array access (prevents OOB)
if (slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss_owner)) { if (slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss_owner)) {
@ -430,7 +431,7 @@
mag->top++; mag->top++;
} }
} }
} else if (!tiny_optional_push(class_idx, (void*)((uint8_t*)ptr - 1))) { // Phase E1-CORRECT } else if (!tiny_optional_push(class_idx, HAK_BASE_TO_RAW(hak_user_to_base(HAK_USER_FROM_RAW(ptr))))) { // FIX: use ptr_user_to_base
// Phase 10: Use hak_base_ptr_t // Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr)); hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr); mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
@ -467,7 +468,7 @@
mag->top++; mag->top++;
} }
} }
} else if (!tiny_optional_push(class_idx, (void*)((uint8_t*)ptr - 1))) { // Phase E1-CORRECT } else if (!tiny_optional_push(class_idx, HAK_BASE_TO_RAW(hak_user_to_base(HAK_USER_FROM_RAW(ptr))))) { // FIX: use ptr_user_to_base
// Phase 10: Use hak_base_ptr_t // Phase 10: Use hak_base_ptr_t
hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr)); hak_base_ptr_t base_ptr = hak_user_to_base(HAK_USER_FROM_RAW(ptr));
mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr); mag->items[mag->top].ptr = HAK_BASE_TO_RAW(base_ptr);
@ -497,7 +498,8 @@
return; return;
} else if (slab) { } else if (slab) {
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header // Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
void* base = (void*)((uint8_t*)ptr - 1); // FIX: Use ptr_user_to_base to get correct base
void* base = HAK_BASE_TO_RAW(hak_user_to_base(HAK_USER_FROM_RAW(ptr)));
tiny_remote_push(slab, base); tiny_remote_push(slab, base);
} }
} }