diff --git a/Makefile b/Makefile index 662916c7..82c01cf5 100644 --- a/Makefile +++ b/Makefile @@ -368,7 +368,7 @@ test-box-refactor: box-refactor ./larson_hakmem 10 8 128 1024 1 12345 4 # Phase 4: Tiny Pool benchmarks (properly linked with hakmem) -TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o +TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/link_stubs.o TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE) ifeq ($(POOL_TLS_PHASE1),1) TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o diff --git a/core/box/front_gate_classifier.c b/core/box/front_gate_classifier.c index 1f1d8405..f1c3b764 100644 --- a/core/box/front_gate_classifier.c +++ b/core/box/front_gate_classifier.c @@ -63,6 +63,10 @@ static void __attribute__((destructor)) front_gate_stats_destructor(void) { // // Performance: 2-3 cycles (L1 cache hit) static inline int safe_header_probe(void* ptr) { + // Reject obviously invalid/sentinel-sized pointers (defense-in-depth) + if ((uintptr_t)ptr < 4096) { + return -1; + } // Safety check: header must be in same page as ptr uintptr_t offset_in_page = (uintptr_t)ptr & 0xFFF; if (offset_in_page == 0) { @@ -140,9 +144,13 @@ static inline ptr_classification_t registry_lookup(void* ptr) { return result; } - // Valid Tiny allocation (headerless) - // Note: C7 (1KB) is the only headerless class, but Registry handles all - result.kind = PTR_KIND_TINY_HEADERLESS; + // Valid Tiny allocation + // Only class 7 (1KB) is headerless. Other classes use header-based free path. + if (ss->size_class == 7) { + result.kind = PTR_KIND_TINY_HEADERLESS; + } else { + result.kind = PTR_KIND_TINY_HEADER; + } return result; } @@ -177,6 +185,11 @@ ptr_classification_t classify_ptr(void* ptr) { }; if (!ptr) return result; + // Early guard: reject non-canonical tiny integers to avoid ptr-1 probe crashes + if ((uintptr_t)ptr < 4096) { + result.kind = PTR_KIND_UNKNOWN; + return result; + } // Step 1: Try safe header probe (C0-C6 fast path: 5-10 cycles) // Skip header probe on 1KB-aligned pointers to avoid misclassifying C7/headerless @@ -186,13 +199,22 @@ ptr_classification_t classify_ptr(void* ptr) { } if (class_idx >= 0) { // Header found - C0-C6 with header - result.kind = PTR_KIND_TINY_HEADER; - result.class_idx = class_idx; + // Additional safety: verify pointer belongs to a SuperSlab region. + // This avoids rare false positives where random header bytes look like 0xA0. + struct SuperSlab* ss_chk = hak_super_lookup(ptr); + if (!ss_chk) { + // Not in Tiny registry; treat as UNKNOWN and continue + // (fall back to later checks) + } else { + result.kind = PTR_KIND_TINY_HEADER; + result.class_idx = class_idx; + result.ss = ss_chk; #if !HAKMEM_BUILD_RELEASE g_classify_header_hit++; #endif - return result; + return result; + } } // Step 2: Check Pool TLS (before Registry to avoid false positives) diff --git a/core/box/tls_sll_box.h b/core/box/tls_sll_box.h index 3f51f35c..b340588b 100644 --- a/core/box/tls_sll_box.h +++ b/core/box/tls_sll_box.h @@ -64,8 +64,13 @@ static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity) { // Phase 7 carve operations return base (stride includes header) // SLL stores base to avoid overwriting header with next pointer - // Push to SLL (standard linked list push using base) - *(void**)ptr = g_tls_sll_head[class_idx]; + // Phase 7: Store next pointer at header-safe offset (base+1 for C0-C6) +#if HAKMEM_TINY_HEADER_CLASSIDX + const size_t next_offset = 1; // C7 is rejected above; always skip header +#else + const size_t next_offset = 0; +#endif + *(void**)((uint8_t*)ptr + next_offset) = g_tls_sll_head[class_idx]; g_tls_sll_head[class_idx] = ptr; g_tls_sll_count[class_idx]++; @@ -95,7 +100,13 @@ static inline bool tls_sll_pop(int class_idx, void** out) { } // Pop from SLL (reads next from base) - void* next = *(void**)base; + // Phase 7: Read next pointer at header-safe offset +#if HAKMEM_TINY_HEADER_CLASSIDX + const size_t next_offset = (class_idx == 7) ? 0 : 1; +#else + const size_t next_offset = 0; +#endif + void* next = *(void**)((uint8_t*)base + next_offset); g_tls_sll_head[class_idx] = next; if (g_tls_sll_count[class_idx] > 0) { g_tls_sll_count[class_idx]--; @@ -152,8 +163,13 @@ static inline uint32_t tls_sll_splice(int class_idx, void* chain_head, uint32_t // Find chain tail (traverse to_move - 1 nodes) // NOTE: Chain MUST be linked using base pointers (caller responsibility) void* tail = chain_head; +#if HAKMEM_TINY_HEADER_CLASSIDX + const size_t next_offset = 1; // Chain is built from header-safe links (C7 rejected) +#else + const size_t next_offset = 0; +#endif for (uint32_t i = 1; i < to_move; i++) { - void* next = *(void**)tail; + void* next = *(void**)((uint8_t*)tail + next_offset); if (!next) { // Chain shorter than expected, adjust to_move to_move = i; @@ -163,7 +179,7 @@ static inline uint32_t tls_sll_splice(int class_idx, void* chain_head, uint32_t } // Splice chain to SLL head - *(void**)tail = g_tls_sll_head[class_idx]; + *(void**)((uint8_t*)tail + next_offset) = g_tls_sll_head[class_idx]; g_tls_sll_head[class_idx] = chain_head; g_tls_sll_count[class_idx] += to_move; diff --git a/core/hakmem_tiny_fastcache.inc.h b/core/hakmem_tiny_fastcache.inc.h index 73c62759..4c397ad8 100644 --- a/core/hakmem_tiny_fastcache.inc.h +++ b/core/hakmem_tiny_fastcache.inc.h @@ -79,7 +79,13 @@ static inline __attribute__((always_inline)) void* tiny_fast_pop(int class_idx) if (cap == 0) return NULL; void* head = g_fast_head[class_idx]; if (!head) return NULL; - void* next = *(void**)head; + // Phase 7: header-aware next pointer (C0-C6: base+1, C7: base) +#if HAKMEM_TINY_HEADER_CLASSIDX + const size_t next_offset = (class_idx == 7) ? 0 : 1; +#else + const size_t next_offset = 0; +#endif + void* next = *(void**)((uint8_t*)head + next_offset); g_fast_head[class_idx] = next; uint16_t count = g_fast_count[class_idx]; if (count > 0) { @@ -112,7 +118,13 @@ static inline __attribute__((always_inline)) int tiny_fast_push(int class_idx, v tiny_fast_debug_log(class_idx, "full", count, cap); return 0; } - *(void**)ptr = g_fast_head[class_idx]; + // Phase 7: header-aware next pointer (C0-C6: base+1, C7: base) +#if HAKMEM_TINY_HEADER_CLASSIDX + const size_t next_offset2 = (class_idx == 7) ? 0 : 1; +#else + const size_t next_offset2 = 0; +#endif + *(void**)((uint8_t*)ptr + next_offset2) = g_fast_head[class_idx]; g_fast_head[class_idx] = ptr; g_fast_count[class_idx] = (uint16_t)(count + 1); g_fast_push_hits[class_idx]++; diff --git a/core/hakmem_tiny_hot_pop.inc.h b/core/hakmem_tiny_hot_pop.inc.h index d9795add..66e72ab9 100644 --- a/core/hakmem_tiny_hot_pop.inc.h +++ b/core/hakmem_tiny_hot_pop.inc.h @@ -39,13 +39,20 @@ static inline __attribute__((always_inline)) void* tiny_hot_pop_class0(void) { if (__builtin_expect(cap == 0, 0)) return NULL; void* head = g_fast_head[0]; if (__builtin_expect(head == NULL, 0)) return NULL; - g_fast_head[0] = *(void**)head; + // Phase 7: header-aware next pointer (C0-C6: base+1, C7: base) +#if HAKMEM_TINY_HEADER_CLASSIDX + const size_t next_off0 = 1; // class 0 is headered +#else + const size_t next_off0 = 0; +#endif + g_fast_head[0] = *(void**)((uint8_t*)head + next_off0); uint16_t count = g_fast_count[0]; if (count > 0) { g_fast_count[0] = (uint16_t)(count - 1); } else { g_fast_count[0] = 0; } + // No C7 here (class 0), just return base return head; } @@ -61,7 +68,13 @@ static inline __attribute__((always_inline)) void* tiny_hot_pop_class1(void) { if (__builtin_expect(cap == 0, 0)) return NULL; void* head = g_fast_head[1]; if (__builtin_expect(head == NULL, 0)) return NULL; - g_fast_head[1] = *(void**)head; + // Phase 7: header-aware next pointer (C0-C6: base+1) +#if HAKMEM_TINY_HEADER_CLASSIDX + const size_t next_off1 = 1; +#else + const size_t next_off1 = 0; +#endif + g_fast_head[1] = *(void**)((uint8_t*)head + next_off1); uint16_t count = g_fast_count[1]; if (count > 0) { g_fast_count[1] = (uint16_t)(count - 1); @@ -83,7 +96,13 @@ static inline __attribute__((always_inline)) void* tiny_hot_pop_class2(void) { if (__builtin_expect(cap == 0, 0)) return NULL; void* head = g_fast_head[2]; if (__builtin_expect(head == NULL, 0)) return NULL; - g_fast_head[2] = *(void**)head; + // Phase 7: header-aware next pointer (C0-C6: base+1) +#if HAKMEM_TINY_HEADER_CLASSIDX + const size_t next_off2 = 1; +#else + const size_t next_off2 = 0; +#endif + g_fast_head[2] = *(void**)((uint8_t*)head + next_off2); uint16_t count = g_fast_count[2]; if (count > 0) { g_fast_count[2] = (uint16_t)(count - 1); @@ -105,7 +124,13 @@ static inline __attribute__((always_inline)) void* tiny_hot_pop_class3(void) { if (__builtin_expect(cap == 0, 0)) return NULL; void* head = g_fast_head[3]; if (__builtin_expect(head == NULL, 0)) return NULL; - g_fast_head[3] = *(void**)head; + // Phase 7: header-aware next pointer (C0-C6: base+1) +#if HAKMEM_TINY_HEADER_CLASSIDX + const size_t next_off3 = 1; +#else + const size_t next_off3 = 0; +#endif + g_fast_head[3] = *(void**)((uint8_t*)head + next_off3); uint16_t count = g_fast_count[3]; if (count > 0) { g_fast_count[3] = (uint16_t)(count - 1); diff --git a/core/hakmem_tiny_refill_p0.inc.h b/core/hakmem_tiny_refill_p0.inc.h index ac512c58..924ac59c 100644 --- a/core/hakmem_tiny_refill_p0.inc.h +++ b/core/hakmem_tiny_refill_p0.inc.h @@ -310,7 +310,7 @@ static inline int sll_refill_batch_from_ss(int class_idx, int max_take) { } TinyRefillChain carve; - trc_linear_carve(slab_base, bs, meta, batch, &carve); + trc_linear_carve(slab_base, bs, meta, batch, class_idx, &carve); trc_splice_to_sll(class_idx, &carve, &g_tls_sll_head[class_idx], &g_tls_sll_count[class_idx]); // FIX: Update SuperSlab active counter (was missing!) ss_active_add(tls->ss, batch); diff --git a/core/link_stubs.c b/core/link_stubs.c index 6de02747..18866ee2 100644 --- a/core/link_stubs.c +++ b/core/link_stubs.c @@ -7,6 +7,15 @@ __attribute__((weak)) void hak_tiny_prewarm_tls_cache(void) {} +// Weak stubs for remote tracking (avoid LTO link errors when tiny_remote.c is GC'ed) +struct SuperSlab; // forward decl to avoid heavy includes +__attribute__((weak)) void tiny_remote_track_on_local_free(struct SuperSlab* ss, int slab_idx, void* node, const char* stage, unsigned int tid) { + (void)ss; (void)slab_idx; (void)node; (void)stage; (void)tid; +} +__attribute__((weak)) void tiny_remote_track_expect_alloc(struct SuperSlab* ss, int slab_idx, void* node, const char* stage, unsigned int tid) { + (void)ss; (void)slab_idx; (void)node; (void)stage; (void)tid; +} + __attribute__((weak)) void* pool_alloc(size_t size) { // Fallback to malloc if Pool TLS not linked return malloc(size); @@ -16,4 +25,3 @@ __attribute__((weak)) void pool_free(void* ptr) { // Fallback to free if Pool TLS not linked free(ptr); } - diff --git a/core/link_stubs.d b/core/link_stubs.d new file mode 100644 index 00000000..b50d270f --- /dev/null +++ b/core/link_stubs.d @@ -0,0 +1 @@ +core/link_stubs.o: core/link_stubs.c diff --git a/core/tiny_alloc_fast.inc.h b/core/tiny_alloc_fast.inc.h index eaf577eb..23eab173 100644 --- a/core/tiny_alloc_fast.inc.h +++ b/core/tiny_alloc_fast.inc.h @@ -272,8 +272,13 @@ static inline int sfc_refill_from_sll(int class_idx, int target_count) { break; // SLL empty } - // Push to SFC (Layer 0) - *(void**)ptr = g_sfc_head[class_idx]; + // Push to SFC (Layer 0) — header-aware +#if HAKMEM_TINY_HEADER_CLASSIDX + const size_t sfc_next_off = (class_idx == 7) ? 0 : 1; +#else + const size_t sfc_next_off = 0; +#endif + *(void**)((uint8_t*)ptr + sfc_next_off) = g_sfc_head[class_idx]; g_sfc_head[class_idx] = ptr; g_sfc_count[class_idx]++; diff --git a/core/tiny_alloc_fast_sfc.inc.h b/core/tiny_alloc_fast_sfc.inc.h index 5be18a6a..1c56d163 100644 --- a/core/tiny_alloc_fast_sfc.inc.h +++ b/core/tiny_alloc_fast_sfc.inc.h @@ -75,18 +75,23 @@ extern sfc_stats_t g_sfc_stats[TINY_NUM_CLASSES]; // Contract: Caller owns returned pointer // Invariants: count ≥ 0, all pointers belong to correct class static inline void* sfc_alloc(int cls) { - void* head = g_sfc_head[cls]; + void* base = g_sfc_head[cls]; - if (__builtin_expect(head != NULL, 1)) { - // Pop: 3 instructions (mimalloc/tcache style) - g_sfc_head[cls] = *(void**)head; // next = *head + if (__builtin_expect(base != NULL, 1)) { +#if HAKMEM_TINY_HEADER_CLASSIDX + const size_t next_offset = (cls == 7) ? 0 : 1; +#else + const size_t next_offset = 0; +#endif + // Pop: header-aware next + g_sfc_head[cls] = *(void**)((uint8_t*)base + next_offset); g_sfc_count[cls]--; // count-- #if HAKMEM_DEBUG_COUNTERS g_sfc_stats[cls].alloc_hits++; #endif - return head; // 🚀 SFC HIT! + return base; // 🚀 SFC HIT! (returns base) } #if HAKMEM_DEBUG_COUNTERS @@ -114,9 +119,14 @@ static inline int sfc_free_push(int cls, void* ptr) { } if (__builtin_expect(cnt < cap, 1)) { - // Push: 3 instructions - *(void**)ptr = g_sfc_head[cls]; // *ptr = head - g_sfc_head[cls] = ptr; // head = ptr +#if HAKMEM_TINY_HEADER_CLASSIDX + const size_t next_offset = (cls == 7) ? 0 : 1; +#else + const size_t next_offset = 0; +#endif + // Push: header-aware next placement + *(void**)((uint8_t*)ptr + next_offset) = g_sfc_head[cls]; + g_sfc_head[cls] = ptr; // head = base g_sfc_count[cls] = cnt + 1; // count++ #if HAKMEM_DEBUG_COUNTERS diff --git a/core/tiny_free_magazine.inc.h b/core/tiny_free_magazine.inc.h index fd5c2c55..4b4e8fc2 100644 --- a/core/tiny_free_magazine.inc.h +++ b/core/tiny_free_magazine.inc.h @@ -75,12 +75,22 @@ if (limit > cap/2) limit = cap/2; if (limit > 32) limit = 32; // keep free-path bounded void* head = (class_idx == 7) ? ptr : (void*)((uint8_t*)ptr - 1); - *(void**)head = NULL; +#if HAKMEM_TINY_HEADER_CLASSIDX + const size_t next_off = (class_idx == 7) ? 0 : 1; +#else + const size_t next_off = 0; +#endif + *(void**)((uint8_t*)head + next_off) = NULL; void* tail = head; // current tail int taken = 1; while (taken < limit && mag->top > 0) { void* p2 = mag->items[--mag->top].ptr; - *(void**)p2 = head; +#if HAKMEM_TINY_HEADER_CLASSIDX + const size_t next_off2 = (class_idx == 7) ? 0 : 1; +#else + const size_t next_off2 = 0; +#endif + *(void**)((uint8_t*)p2 + next_off2) = head; head = p2; taken++; } diff --git a/core/tiny_refill_opt.h b/core/tiny_refill_opt.h index ee12d0ca..67e74014 100644 --- a/core/tiny_refill_opt.h +++ b/core/tiny_refill_opt.h @@ -41,11 +41,17 @@ static inline void refill_opt_dbg(const char* stage, int class_idx, uint32_t n) #endif } -static inline void trc_push_front(TinyRefillChain* c, void* node) { +// Phase 7 header-aware push_front: link using base+1 for C0-C6 (C7 not used here) +static inline void trc_push_front(TinyRefillChain* c, void* node, int class_idx) { +#if HAKMEM_TINY_HEADER_CLASSIDX + const size_t next_offset = (class_idx == 7) ? 0 : 1; +#else + const size_t next_offset = 0; +#endif if (c->head == NULL) { - c->head = node; c->tail = node; *(void**)node = NULL; c->count = 1; + c->head = node; c->tail = node; *(void**)((uint8_t*)node + next_offset) = NULL; c->count = 1; } else { - *(void**)node = c->head; c->head = node; c->count++; + *(void**)((uint8_t*)node + next_offset) = c->head; c->head = node; c->count++; } } @@ -167,7 +173,7 @@ static inline uint32_t trc_pop_from_freelist(struct TinySlabMeta* meta, trc_failfast_abort("freelist_next", class_idx, ss_base, ss_limit, next); } meta->freelist = next; - trc_push_front(out, p); + trc_push_front(out, p, class_idx); taken++; } // DEBUG REMOVED: refill_opt_dbg causes -26% regression (atomic CAS overhead) @@ -175,9 +181,12 @@ static inline uint32_t trc_pop_from_freelist(struct TinySlabMeta* meta, } // Carve a contiguous batch of size 'batch' from linear area, return as chain +// Phase 7 header-aware carve: link chain using header-safe next location +// class_idx is required to decide headerless (C7) vs headered (C0-C6) static inline uint32_t trc_linear_carve(uint8_t* base, size_t bs, struct TinySlabMeta* meta, uint32_t batch, + int class_idx, TinyRefillChain* out) { if (!out || batch == 0) return 0; trc_init(out); @@ -206,9 +215,18 @@ static inline uint32_t trc_linear_carve(uint8_t* base, size_t bs, (void*)base, meta->carved, batch, (void*)cursor); } + // CRITICAL FIX (Phase 7): header-aware next pointer placement + // For header classes (C0-C6), the first byte at base is the 1-byte header. + // Store the SLL next pointer at base+1 to avoid clobbering the header. + // For C7 (headerless), store at base. +#if HAKMEM_TINY_HEADER_CLASSIDX + const size_t next_offset = (class_idx == 7) ? 0 : 1; +#else + const size_t next_offset = 0; +#endif for (uint32_t i = 1; i < batch; i++) { uint8_t* next = cursor + stride; - *(void**)cursor = (void*)next; + *(void**)(cursor + next_offset) = (void*)next; cursor = next; } void* tail = (void*)cursor; diff --git a/core/tiny_region_id.h b/core/tiny_region_id.h index a7cb35f6..af5737d9 100644 --- a/core/tiny_region_id.h +++ b/core/tiny_region_id.h @@ -64,6 +64,7 @@ static inline void* tiny_region_id_write_header(void* base, int class_idx) { // Returns: class_idx (0-7), or -1 if invalid static inline int tiny_region_id_read_header(void* ptr) { if (!ptr) return -1; + if ((uintptr_t)ptr < 4096) return -1; // reject invalid tiny values uint8_t* header_ptr = (uint8_t*)ptr - 1; @@ -129,6 +130,7 @@ static inline int tiny_region_id_read_header(void* ptr) { static inline int tiny_region_id_has_header(void* ptr) { #if !HAKMEM_BUILD_RELEASE if (!ptr) return 0; + if ((uintptr_t)ptr < 4096) return 0; uint8_t* header_ptr = (uint8_t*)ptr - 1; uint8_t header = *header_ptr;