Implement Phase 2: Headerless Allocator Support (Partial)
- Feature: Added HAKMEM_TINY_HEADERLESS toggle (A/B testing) - Feature: Implemented Headerless layout logic (Offset=0) - Refactor: Centralized layout definitions in tiny_layout_box.h - Refactor: Abstracted pointer arithmetic in free path via ptr_conversion_box.h - Verification: sh8bench passes in Headerless mode (No TLS_SLL_HDR_RESET) - Known Issue: Regression in Phase 1 mode due to blind pointer conversion logic
This commit is contained in:
@ -125,7 +125,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
||||
if (!ss || ss->magic != SUPERSLAB_MAGIC) return;
|
||||
// Derive class_idx from per-slab metadata instead of ss->size_class
|
||||
int class_idx = -1;
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
int slab_idx = slab_index_for(ss, base);
|
||||
if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
|
||||
TinySlabMeta* meta_probe = &ss->slabs[slab_idx];
|
||||
@ -156,7 +156,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_ENTER, (uint16_t)class_idx, ptr, 0);
|
||||
// Detect cross-thread: cross-thread free MUST go via superslab path
|
||||
// ✅ FIX: Phase E1-CORRECT - Convert USER → BASE before slab index calculation
|
||||
base = (void*)((uint8_t*)ptr - 1);
|
||||
base = ptr_user_to_base_blind(ptr);
|
||||
slab_idx = slab_index_for(ss, base);
|
||||
int ss_cap = ss_slabs_capacity(ss);
|
||||
if (__builtin_expect(slab_idx < 0 || slab_idx >= ss_cap, 0)) {
|
||||
@ -168,7 +168,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
||||
size_t blk = g_tiny_class_sizes[class_idx];
|
||||
uint8_t* slab_base = tiny_slab_base_for(ss, slab_idx);
|
||||
// Phase E1-CORRECT: All classes have headers, validate block base (ptr-1) not user ptr
|
||||
uintptr_t delta = (uintptr_t)((uint8_t*)ptr - 1) - (uintptr_t)slab_base;
|
||||
uintptr_t delta = (uintptr_t)ptr_user_to_base_blind(ptr) - (uintptr_t)slab_base;
|
||||
int cap_ok = (meta->capacity > 0) ? 1 : 0;
|
||||
int align_ok = (delta % blk) == 0;
|
||||
int range_ok = cap_ok && (delta / blk) < meta->capacity;
|
||||
@ -216,7 +216,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
||||
if (__builtin_expect(g_debug_fast0, 0)) {
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_FRONT_BYPASS, (uint16_t)class_idx, ptr, (uintptr_t)slab_idx);
|
||||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
void* prev = meta->freelist;
|
||||
tiny_next_write(class_idx, base, prev); // Box API: uses offset 1 for headers
|
||||
meta->freelist = base;
|
||||
@ -234,7 +234,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
||||
// Front-V2: try to return to TLS magazine first (A/B, default OFF)
|
||||
// Phase 7-Step8: Use config macro for dead code elimination in PGO mode
|
||||
if (__builtin_expect(TINY_FRONT_HEAP_V2_ENABLED && class_idx <= 3, 0)) {
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
if (tiny_heap_v2_try_push(class_idx, base)) {
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_FAST, (uint16_t)class_idx, ptr, slab_idx);
|
||||
HAK_STAT_FREE(class_idx);
|
||||
@ -244,7 +244,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
||||
|
||||
if (g_fast_enable && g_fast_cap[class_idx] != 0) {
|
||||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
int pushed = 0;
|
||||
// Phase 7-Step5: Use config macro for dead code elimination in PGO mode
|
||||
if (__builtin_expect(TINY_FRONT_FASTCACHE_ENABLED && class_idx <= 3, 1)) {
|
||||
@ -268,7 +268,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
||||
// TinyHotMag front push(8/16/32B, A/B)
|
||||
if (__builtin_expect(g_hotmag_enable && class_idx <= 2, 1)) {
|
||||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
if (hotmag_push(class_idx, base)) {
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_RETURN_MAG, (uint16_t)class_idx, ptr, 1);
|
||||
HAK_STAT_FREE(class_idx);
|
||||
@ -277,7 +277,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
||||
}
|
||||
if (tls->count < tls->cap) {
|
||||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
tiny_tls_list_guard_push(class_idx, tls, base);
|
||||
tls_list_push(tls, base, class_idx);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_LOCAL, (uint16_t)class_idx, ptr, 0);
|
||||
@ -290,7 +290,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
||||
}
|
||||
{
|
||||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
tiny_tls_list_guard_push(class_idx, tls, base);
|
||||
tls_list_push(tls, base, class_idx);
|
||||
}
|
||||
@ -332,7 +332,7 @@ void hak_tiny_free(void* ptr) {
|
||||
// Resolve class_idx from per-slab metadata instead of ss->size_class
|
||||
SuperSlab* ss = hak_super_lookup(ptr);
|
||||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
int sidx = slab_index_for(ss, base);
|
||||
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
|
||||
TinySlabMeta* m = &ss->slabs[sidx];
|
||||
@ -392,7 +392,7 @@ void hak_tiny_free(void* ptr) {
|
||||
// Resolve class_idx from per-slab metadata instead of ss->size_class
|
||||
SuperSlab* ss = hak_super_lookup(ptr);
|
||||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
int sidx = slab_index_for(ss, base);
|
||||
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
|
||||
TinySlabMeta* m = &ss->slabs[sidx];
|
||||
@ -470,7 +470,7 @@ void hak_tiny_free(void* ptr) {
|
||||
if (g_use_superslab) {
|
||||
fast_ss = hak_super_lookup(ptr);
|
||||
if (fast_ss && fast_ss->magic == SUPERSLAB_MAGIC) {
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
int sidx = slab_index_for(fast_ss, base);
|
||||
if (sidx >= 0 && sidx < ss_slabs_capacity(fast_ss)) {
|
||||
TinySlabMeta* m = &fast_ss->slabs[sidx];
|
||||
@ -494,7 +494,7 @@ void hak_tiny_free(void* ptr) {
|
||||
int ss_cls = -1, ts_cls = -1;
|
||||
SuperSlab* chk_ss = fast_ss ? fast_ss : (g_use_superslab ? hak_super_lookup(ptr) : NULL);
|
||||
if (chk_ss && chk_ss->magic == SUPERSLAB_MAGIC) {
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
int sidx = slab_index_for(chk_ss, base);
|
||||
if (sidx >= 0 && sidx < ss_slabs_capacity(chk_ss)) {
|
||||
TinySlabMeta* m = &chk_ss->slabs[sidx];
|
||||
@ -516,7 +516,7 @@ void hak_tiny_free(void* ptr) {
|
||||
}
|
||||
if (fast_class_idx >= 0 && g_fast_enable && g_fast_cap[fast_class_idx] != 0) {
|
||||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||||
void* base2 = (void*)((uint8_t*)ptr - 1);
|
||||
void* base2 = ptr_user_to_base_blind(ptr);
|
||||
// PRIORITY 1: Try FastCache first (bypasses SLL when Front-Direct)
|
||||
int pushed = 0;
|
||||
// Phase 7-Step5: Use config macro for dead code elimination in PGO mode
|
||||
@ -543,7 +543,7 @@ void hak_tiny_free(void* ptr) {
|
||||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||||
// Derive class from per-slab meta
|
||||
int cls = -1;
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
int sidx = slab_index_for(ss, base);
|
||||
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
|
||||
TinySlabMeta* m = &ss->slabs[sidx];
|
||||
|
||||
Reference in New Issue
Block a user