Implement Phase 2: Headerless Allocator Support (Partial)

- Feature: Added HAKMEM_TINY_HEADERLESS toggle (A/B testing)
- Feature: Implemented Headerless layout logic (Offset=0)
- Refactor: Centralized layout definitions in tiny_layout_box.h
- Refactor: Abstracted pointer arithmetic in free path via ptr_conversion_box.h
- Verification: sh8bench passes in Headerless mode (No TLS_SLL_HDR_RESET)
- Known Issue: Regression in Phase 1 mode due to blind pointer conversion logic
This commit is contained in:
Moe Charm (CI)
2025-12-03 12:11:27 +09:00
parent 2f09f3cba8
commit c2716f5c01
18 changed files with 183 additions and 33 deletions

View File

@ -125,7 +125,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
if (!ss || ss->magic != SUPERSLAB_MAGIC) return;
// Derive class_idx from per-slab metadata instead of ss->size_class
int class_idx = -1;
void* base = (void*)((uint8_t*)ptr - 1);
void* base = ptr_user_to_base_blind(ptr);
int slab_idx = slab_index_for(ss, base);
if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
TinySlabMeta* meta_probe = &ss->slabs[slab_idx];
@ -156,7 +156,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
tiny_debug_ring_record(TINY_RING_EVENT_FREE_ENTER, (uint16_t)class_idx, ptr, 0);
// Detect cross-thread: cross-thread free MUST go via superslab path
// ✅ FIX: Phase E1-CORRECT - Convert USER → BASE before slab index calculation
base = (void*)((uint8_t*)ptr - 1);
base = ptr_user_to_base_blind(ptr);
slab_idx = slab_index_for(ss, base);
int ss_cap = ss_slabs_capacity(ss);
if (__builtin_expect(slab_idx < 0 || slab_idx >= ss_cap, 0)) {
@ -168,7 +168,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
size_t blk = g_tiny_class_sizes[class_idx];
uint8_t* slab_base = tiny_slab_base_for(ss, slab_idx);
// Phase E1-CORRECT: All classes have headers, validate block base (ptr-1) not user ptr
uintptr_t delta = (uintptr_t)((uint8_t*)ptr - 1) - (uintptr_t)slab_base;
uintptr_t delta = (uintptr_t)ptr_user_to_base_blind(ptr) - (uintptr_t)slab_base;
int cap_ok = (meta->capacity > 0) ? 1 : 0;
int align_ok = (delta % blk) == 0;
int range_ok = cap_ok && (delta / blk) < meta->capacity;
@ -216,7 +216,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
if (__builtin_expect(g_debug_fast0, 0)) {
tiny_debug_ring_record(TINY_RING_EVENT_FRONT_BYPASS, (uint16_t)class_idx, ptr, (uintptr_t)slab_idx);
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
void* base = (void*)((uint8_t*)ptr - 1);
void* base = ptr_user_to_base_blind(ptr);
void* prev = meta->freelist;
tiny_next_write(class_idx, base, prev); // Box API: uses offset 1 for headers
meta->freelist = base;
@ -234,7 +234,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
// Front-V2: try to return to TLS magazine first (A/B, default OFF)
// Phase 7-Step8: Use config macro for dead code elimination in PGO mode
if (__builtin_expect(TINY_FRONT_HEAP_V2_ENABLED && class_idx <= 3, 0)) {
void* base = (void*)((uint8_t*)ptr - 1);
void* base = ptr_user_to_base_blind(ptr);
if (tiny_heap_v2_try_push(class_idx, base)) {
tiny_debug_ring_record(TINY_RING_EVENT_FREE_FAST, (uint16_t)class_idx, ptr, slab_idx);
HAK_STAT_FREE(class_idx);
@ -244,7 +244,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
if (g_fast_enable && g_fast_cap[class_idx] != 0) {
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
void* base = (void*)((uint8_t*)ptr - 1);
void* base = ptr_user_to_base_blind(ptr);
int pushed = 0;
// Phase 7-Step5: Use config macro for dead code elimination in PGO mode
if (__builtin_expect(TINY_FRONT_FASTCACHE_ENABLED && class_idx <= 3, 1)) {
@ -268,7 +268,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
// TinyHotMag front push8/16/32B, A/B
if (__builtin_expect(g_hotmag_enable && class_idx <= 2, 1)) {
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
void* base = (void*)((uint8_t*)ptr - 1);
void* base = ptr_user_to_base_blind(ptr);
if (hotmag_push(class_idx, base)) {
tiny_debug_ring_record(TINY_RING_EVENT_FREE_RETURN_MAG, (uint16_t)class_idx, ptr, 1);
HAK_STAT_FREE(class_idx);
@ -277,7 +277,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
}
if (tls->count < tls->cap) {
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
void* base = (void*)((uint8_t*)ptr - 1);
void* base = ptr_user_to_base_blind(ptr);
tiny_tls_list_guard_push(class_idx, tls, base);
tls_list_push(tls, base, class_idx);
tiny_debug_ring_record(TINY_RING_EVENT_FREE_LOCAL, (uint16_t)class_idx, ptr, 0);
@ -290,7 +290,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
}
{
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
void* base = (void*)((uint8_t*)ptr - 1);
void* base = ptr_user_to_base_blind(ptr);
tiny_tls_list_guard_push(class_idx, tls, base);
tls_list_push(tls, base, class_idx);
}
@ -332,7 +332,7 @@ void hak_tiny_free(void* ptr) {
// Resolve class_idx from per-slab metadata instead of ss->size_class
SuperSlab* ss = hak_super_lookup(ptr);
if (ss && ss->magic == SUPERSLAB_MAGIC) {
void* base = (void*)((uint8_t*)ptr - 1);
void* base = ptr_user_to_base_blind(ptr);
int sidx = slab_index_for(ss, base);
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
TinySlabMeta* m = &ss->slabs[sidx];
@ -392,7 +392,7 @@ void hak_tiny_free(void* ptr) {
// Resolve class_idx from per-slab metadata instead of ss->size_class
SuperSlab* ss = hak_super_lookup(ptr);
if (ss && ss->magic == SUPERSLAB_MAGIC) {
void* base = (void*)((uint8_t*)ptr - 1);
void* base = ptr_user_to_base_blind(ptr);
int sidx = slab_index_for(ss, base);
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
TinySlabMeta* m = &ss->slabs[sidx];
@ -470,7 +470,7 @@ void hak_tiny_free(void* ptr) {
if (g_use_superslab) {
fast_ss = hak_super_lookup(ptr);
if (fast_ss && fast_ss->magic == SUPERSLAB_MAGIC) {
void* base = (void*)((uint8_t*)ptr - 1);
void* base = ptr_user_to_base_blind(ptr);
int sidx = slab_index_for(fast_ss, base);
if (sidx >= 0 && sidx < ss_slabs_capacity(fast_ss)) {
TinySlabMeta* m = &fast_ss->slabs[sidx];
@ -494,7 +494,7 @@ void hak_tiny_free(void* ptr) {
int ss_cls = -1, ts_cls = -1;
SuperSlab* chk_ss = fast_ss ? fast_ss : (g_use_superslab ? hak_super_lookup(ptr) : NULL);
if (chk_ss && chk_ss->magic == SUPERSLAB_MAGIC) {
void* base = (void*)((uint8_t*)ptr - 1);
void* base = ptr_user_to_base_blind(ptr);
int sidx = slab_index_for(chk_ss, base);
if (sidx >= 0 && sidx < ss_slabs_capacity(chk_ss)) {
TinySlabMeta* m = &chk_ss->slabs[sidx];
@ -516,7 +516,7 @@ void hak_tiny_free(void* ptr) {
}
if (fast_class_idx >= 0 && g_fast_enable && g_fast_cap[fast_class_idx] != 0) {
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
void* base2 = (void*)((uint8_t*)ptr - 1);
void* base2 = ptr_user_to_base_blind(ptr);
// PRIORITY 1: Try FastCache first (bypasses SLL when Front-Direct)
int pushed = 0;
// Phase 7-Step5: Use config macro for dead code elimination in PGO mode
@ -543,7 +543,7 @@ void hak_tiny_free(void* ptr) {
if (ss && ss->magic == SUPERSLAB_MAGIC) {
// Derive class from per-slab meta
int cls = -1;
void* base = (void*)((uint8_t*)ptr - 1);
void* base = ptr_user_to_base_blind(ptr);
int sidx = slab_index_for(ss, base);
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
TinySlabMeta* m = &ss->slabs[sidx];