Implement Phase 2: Headerless Allocator Support (Partial)
- Feature: Added HAKMEM_TINY_HEADERLESS toggle (A/B testing) - Feature: Implemented Headerless layout logic (Offset=0) - Refactor: Centralized layout definitions in tiny_layout_box.h - Refactor: Abstracted pointer arithmetic in free path via ptr_conversion_box.h - Verification: sh8bench passes in Headerless mode (No TLS_SLL_HDR_RESET) - Known Issue: Regression in Phase 1 mode due to blind pointer conversion logic
This commit is contained in:
1
Makefile
1
Makefile
@ -214,6 +214,7 @@ LDFLAGS += -fprofile-use
|
||||
endif
|
||||
|
||||
CFLAGS += $(EXTRA_CFLAGS)
|
||||
CFLAGS_SHARED += $(EXTRA_CFLAGS)
|
||||
LDFLAGS += $(EXTRA_LDFLAGS)
|
||||
|
||||
# Targets
|
||||
|
||||
@ -72,7 +72,7 @@ void* bench_fast_alloc(size_t size) {
|
||||
// Reason: P3 optimization skips header writes by default (class_map mode)
|
||||
// But BenchFast REQUIRES headers for free routing (0xa0-0xa7 magic)
|
||||
// Contract: BenchFast always writes headers, regardless of P3 optimization
|
||||
#ifdef HAKMEM_TINY_HEADER_CLASSIDX
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
*(uint8_t*)base = (uint8_t)(0xa0 | (class_idx & 0x0f)); // Direct header write
|
||||
return (void*)((char*)base + 1); // Return USER pointer
|
||||
#else
|
||||
@ -88,7 +88,7 @@ void* bench_fast_alloc(size_t size) {
|
||||
void bench_fast_free(void* ptr) {
|
||||
if (__builtin_expect(!ptr, 0)) return;
|
||||
|
||||
#ifdef HAKMEM_TINY_HEADER_CLASSIDX
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
// 1. Read class_idx from header (1 instruction, 2-3 cycles)
|
||||
int class_idx = tiny_region_id_read_header(ptr);
|
||||
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
|
||||
@ -156,7 +156,7 @@ int bench_fast_init(void) {
|
||||
for (int i = 0; i < (int)capacity; i++) {
|
||||
// Use normal allocator (hak_alloc_at) - recursion safe here
|
||||
size_t size = g_tiny_class_sizes[cls];
|
||||
#ifdef HAKMEM_TINY_HEADER_CLASSIDX
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
// Adjust for header: if class size is N, we need N-1 bytes of user data
|
||||
size = size - 1;
|
||||
#endif
|
||||
@ -170,7 +170,7 @@ int bench_fast_init(void) {
|
||||
return total;
|
||||
}
|
||||
|
||||
#ifdef HAKMEM_TINY_HEADER_CLASSIDX
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
// Convert USER → BASE pointer
|
||||
void* base = (void*)((char*)ptr - 1);
|
||||
|
||||
|
||||
@ -80,6 +80,7 @@ static inline int safe_header_probe(void* ptr) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
// Safe to read header (same page guaranteed)
|
||||
uint8_t* header_ptr = (uint8_t*)ptr - 1;
|
||||
uint8_t header = *header_ptr;
|
||||
@ -98,6 +99,9 @@ static inline int safe_header_probe(void* ptr) {
|
||||
}
|
||||
|
||||
return class_idx;
|
||||
#else
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
// ========== Registry Lookup ==========
|
||||
@ -200,6 +204,7 @@ ptr_classification_t classify_ptr(void* ptr) {
|
||||
uint8_t magic = header & 0xF0;
|
||||
|
||||
// Fast path: Tiny allocation (magic = 0xa0) — guarded by Superslab registry
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
if (magic == HEADER_MAGIC) { // HEADER_MAGIC = 0xa0
|
||||
int class_idx = header & HEADER_CLASS_MASK;
|
||||
if (class_idx >= 0 && class_idx < TINY_NUM_CLASSES) {
|
||||
@ -219,6 +224,7 @@ ptr_classification_t classify_ptr(void* ptr) {
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAKMEM_POOL_TLS_PHASE1
|
||||
// Fast path: Pool TLS allocation (magic = 0xb0)
|
||||
|
||||
@ -127,7 +127,7 @@ static void hak_init_impl(void) {
|
||||
#endif
|
||||
HAKMEM_LOG("[Build] Flavor=%s Flags: HEADER_CLASSIDX=%d, AGGRESSIVE_INLINE=%d, POOL_TLS_PHASE1=%d, POOL_TLS_PREWARM=%d\n",
|
||||
bf,
|
||||
#ifdef HAKMEM_TINY_HEADER_CLASSIDX
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
1,
|
||||
#else
|
||||
0,
|
||||
|
||||
@ -226,7 +226,7 @@ void free(void* ptr) {
|
||||
// WARNING: Bypasses ALL safety checks - benchmark only!
|
||||
if (__builtin_expect(bench_fast_enabled(), 0)) {
|
||||
// Trust header magic to identify Tiny allocations
|
||||
#ifdef HAKMEM_TINY_HEADER_CLASSIDX
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
uint8_t header = *((uint8_t*)ptr - 1);
|
||||
if ((header & 0xf0) == 0xa0) { // Tiny header magic (0xa0-0xa7)
|
||||
bench_fast_free(ptr);
|
||||
|
||||
@ -44,8 +44,15 @@ static inline hak_user_ptr_t ptr_base_to_user(hak_base_ptr_t base, int class_idx
|
||||
return HAK_USER_FROM_RAW(NULL);
|
||||
}
|
||||
|
||||
#if HAKMEM_TINY_HEADERLESS
|
||||
(void)class_idx;
|
||||
// Headerless: user = base (identity)
|
||||
void* raw_user = raw_base;
|
||||
size_t offset = 0;
|
||||
#else
|
||||
size_t offset = tiny_user_offset(class_idx);
|
||||
void* raw_user = (void*)((uint8_t*)raw_base + offset);
|
||||
#endif
|
||||
|
||||
PTR_CONV_LOG("BASE→USER cls=%d base=%p → user=%p (+%zu)\n",
|
||||
class_idx, raw_base, raw_user, offset);
|
||||
@ -66,14 +73,37 @@ static inline hak_base_ptr_t ptr_user_to_base(hak_user_ptr_t user, int class_idx
|
||||
return HAK_BASE_FROM_RAW(NULL);
|
||||
}
|
||||
|
||||
#if HAKMEM_TINY_HEADERLESS
|
||||
(void)class_idx;
|
||||
// Headerless: base = user (identity)
|
||||
void* raw_base = raw_user;
|
||||
size_t offset = 0;
|
||||
#else
|
||||
size_t offset = tiny_user_offset(class_idx);
|
||||
void* raw_base = (void*)((uint8_t*)raw_user - offset);
|
||||
#endif
|
||||
|
||||
PTR_CONV_LOG("USER→BASE cls=%d user=%p → base=%p (-%zu)\n",
|
||||
class_idx, raw_user, raw_base, offset);
|
||||
return HAK_BASE_FROM_RAW(raw_base);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert USER pointer to BASE pointer WITHOUT knowing class_idx
|
||||
* Primary use: free() entry point where class is not yet known
|
||||
*/
|
||||
static inline void* ptr_user_to_base_blind(void* user_ptr) {
|
||||
if (user_ptr == NULL) return NULL;
|
||||
|
||||
#if HAKMEM_TINY_HEADERLESS
|
||||
// Headerless: base = user
|
||||
return user_ptr;
|
||||
#else
|
||||
// Phase 1: All classes have 1 byte header -> base = user - 1
|
||||
return (void*)((uint8_t*)user_ptr - 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience macros for cleaner call sites
|
||||
*/
|
||||
|
||||
@ -75,7 +75,7 @@ static inline void* tiny_cold_refill_and_alloc(int class_idx) {
|
||||
// Success: return USER pointer
|
||||
// NOTE: Header already written by unified_cache_refill()
|
||||
// (Removed redundant tiny_region_id_write_header() - P2 fix)
|
||||
#ifdef HAKMEM_TINY_HEADER_CLASSIDX
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
return (void*)((char*)base + 1); // USER pointer
|
||||
#else
|
||||
return base;
|
||||
|
||||
@ -126,7 +126,7 @@ static inline void* tiny_hot_alloc_fast(int class_idx) {
|
||||
TINY_HOT_METRICS_HIT(class_idx);
|
||||
|
||||
// Write header + return USER pointer (no branch)
|
||||
#ifdef HAKMEM_TINY_HEADER_CLASSIDX
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
tiny_region_id_write_header(base, class_idx); // 1-byte header at BASE
|
||||
return (void*)((char*)base + 1); // Return USER pointer (BASE+1)
|
||||
#else
|
||||
|
||||
@ -17,11 +17,32 @@
|
||||
#include <stddef.h>
|
||||
#include "../hakmem_tiny_config.h" // For g_tiny_class_sizes and TINY_NUM_CLASSES
|
||||
|
||||
// A/B Toggle: Headerless mode
|
||||
// ENV: HAKMEM_TINY_HEADERLESS=1 to enable
|
||||
// Default: 0 (Phase 1 compatible)
|
||||
#ifndef HAKMEM_TINY_HEADERLESS
|
||||
#define HAKMEM_TINY_HEADERLESS 0
|
||||
#endif
|
||||
|
||||
// Define all class-specific layout parameters
|
||||
// Current: Defined in g_tiny_class_sizes[8] in hakmem_tiny.c
|
||||
// This file makes them accessible via a unified Box API
|
||||
|
||||
// Header size is 1 byte when enabled
|
||||
// Header size
|
||||
static inline size_t tiny_header_size(int class_idx) {
|
||||
#if HAKMEM_TINY_HEADERLESS
|
||||
(void)class_idx;
|
||||
return 0;
|
||||
#else
|
||||
// Phase 1: 1 byte header if enabled
|
||||
// C0 (8B): offset 0 (8B stride too small for header + 8B pointer - would overflow)
|
||||
// C7 (2048B): offset 0 (overwrites header in freelist - largest class can tolerate)
|
||||
// C1-C6: offset 1 (header preserved - user data is not disturbed)
|
||||
return (0x7Eu >> class_idx) & 1u;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Legacy macro for backward compatibility (Phase 1)
|
||||
#define TINY_HEADER_SIZE 1
|
||||
|
||||
// Validation macros
|
||||
@ -38,7 +59,10 @@ static inline size_t tiny_class_stride(int class_idx) {
|
||||
// Calculate user pointer offset from base pointer
|
||||
// This logic centralizes the "User = Base + 1" vs "User = Base + 0" decision
|
||||
static inline size_t tiny_user_offset(int class_idx) {
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
#if HAKMEM_TINY_HEADERLESS
|
||||
(void)class_idx;
|
||||
return 0; // Headerless: user = base
|
||||
#elif HAKMEM_TINY_HEADER_CLASSIDX
|
||||
// C0 (8B): offset 0 (8B stride too small for header + 8B pointer - would overflow)
|
||||
// C7 (2048B): offset 0 (overwrites header in freelist - largest class can tolerate)
|
||||
// C1-C6: offset 1 (header preserved - user data is not disturbed)
|
||||
|
||||
@ -368,6 +368,7 @@ static inline bool tls_sll_push_impl(int class_idx, hak_base_ptr_t ptr, uint32_t
|
||||
// Header restoration using Header Box (C1-C6 only; C0/C7 skip)
|
||||
// Safe mode (HAKMEM_TINY_SLL_SAFEHEADER=1): never overwrite header; reject on magic mismatch.
|
||||
// Default mode: restore expected header.
|
||||
#if !HAKMEM_TINY_HEADERLESS
|
||||
if (tiny_class_preserves_header(class_idx)) {
|
||||
static int g_sll_safehdr = -1;
|
||||
static int g_sll_ring_en = -1; // optional ring trace for TLS-SLL anomalies
|
||||
@ -407,6 +408,7 @@ static inline bool tls_sll_push_impl(int class_idx, hak_base_ptr_t ptr, uint32_t
|
||||
__atomic_thread_fence(__ATOMIC_RELEASE);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
tls_sll_debug_guard(class_idx, ptr, "push");
|
||||
|
||||
@ -568,6 +570,7 @@ static inline bool tls_sll_pop_impl(int class_idx, hak_base_ptr_t* out, const ch
|
||||
tls_sll_debug_guard(class_idx, base, "pop");
|
||||
|
||||
// Header validation using Header Box (C1-C6 only; C0/C7 skip)
|
||||
#if !HAKMEM_TINY_HEADERLESS
|
||||
if (tiny_class_preserves_header(class_idx)) {
|
||||
uint8_t got, expect;
|
||||
PTR_TRACK_TLS_POP(raw_base, class_idx);
|
||||
@ -608,6 +611,7 @@ static inline bool tls_sll_pop_impl(int class_idx, hak_base_ptr_t* out, const ch
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Read next via Box API.
|
||||
void* raw_next;
|
||||
|
||||
@ -126,7 +126,7 @@ __attribute__((always_inline))
|
||||
static inline int free_tiny_fast(void* ptr) {
|
||||
if (__builtin_expect(!ptr, 0)) return 0;
|
||||
|
||||
#ifdef HAKMEM_TINY_HEADER_CLASSIDX
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
// 1. ページ境界ガード:
|
||||
// ptr がページ先頭 (offset==0) の場合、ptr-1 は別ページか未マップ領域になる可能性がある。
|
||||
// その場合はヘッダ読みを行わず、通常 free 経路にフォールバックする。
|
||||
|
||||
@ -5,6 +5,15 @@
|
||||
#ifndef HAKMEM_BUILD_FLAGS_H
|
||||
#define HAKMEM_BUILD_FLAGS_H
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// Phase 2: Headerless Mode Override
|
||||
// ------------------------------------------------------------
|
||||
// If Headerless is enabled, force HEADER_CLASSIDX to 0
|
||||
#if defined(HAKMEM_TINY_HEADERLESS) && HAKMEM_TINY_HEADERLESS
|
||||
#undef HAKMEM_TINY_HEADER_CLASSIDX
|
||||
#define HAKMEM_TINY_HEADER_CLASSIDX 0
|
||||
#endif
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// Release/debug detection
|
||||
// ------------------------------------------------------------
|
||||
|
||||
@ -131,11 +131,20 @@ static __thread unsigned char g_tls_bench_warm_done[4];
|
||||
static inline void tiny_debug_track_alloc_ret(int cls, void* ptr);
|
||||
|
||||
// ========== HAK_RET_ALLOC: Single Definition Point ==========
|
||||
// Choose implementation based on HAKMEM_TINY_HEADER_CLASSIDX
|
||||
// - Phase 7 enabled: Write header and return user pointer
|
||||
// Choose implementation based on HAKMEM_TINY_HEADERLESS or HAKMEM_TINY_HEADER_CLASSIDX
|
||||
// - Headerless enabled: Return base directly (no header write)
|
||||
// - Phase 7 enabled: Write header and return user pointer (base+1)
|
||||
// - Phase 7 disabled: Legacy behavior (stats + route + return)
|
||||
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
#if HAKMEM_TINY_HEADERLESS
|
||||
// Headerless: No header write, user = base
|
||||
// Ultra-fast inline macro (1-2 instructions)
|
||||
#define HAK_RET_ALLOC(cls, base_ptr) do { \
|
||||
/* No header write needed for Headerless mode */ \
|
||||
return (base_ptr); \
|
||||
} while(0)
|
||||
|
||||
#elif HAKMEM_TINY_HEADER_CLASSIDX
|
||||
#if HAKMEM_BUILD_RELEASE
|
||||
// Phase E1-CORRECT: ALL classes have 1-byte headers (including C7)
|
||||
// Ultra-fast inline macro (3-4 instructions)
|
||||
|
||||
@ -125,7 +125,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
||||
if (!ss || ss->magic != SUPERSLAB_MAGIC) return;
|
||||
// Derive class_idx from per-slab metadata instead of ss->size_class
|
||||
int class_idx = -1;
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
int slab_idx = slab_index_for(ss, base);
|
||||
if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
|
||||
TinySlabMeta* meta_probe = &ss->slabs[slab_idx];
|
||||
@ -156,7 +156,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_ENTER, (uint16_t)class_idx, ptr, 0);
|
||||
// Detect cross-thread: cross-thread free MUST go via superslab path
|
||||
// ✅ FIX: Phase E1-CORRECT - Convert USER → BASE before slab index calculation
|
||||
base = (void*)((uint8_t*)ptr - 1);
|
||||
base = ptr_user_to_base_blind(ptr);
|
||||
slab_idx = slab_index_for(ss, base);
|
||||
int ss_cap = ss_slabs_capacity(ss);
|
||||
if (__builtin_expect(slab_idx < 0 || slab_idx >= ss_cap, 0)) {
|
||||
@ -168,7 +168,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
||||
size_t blk = g_tiny_class_sizes[class_idx];
|
||||
uint8_t* slab_base = tiny_slab_base_for(ss, slab_idx);
|
||||
// Phase E1-CORRECT: All classes have headers, validate block base (ptr-1) not user ptr
|
||||
uintptr_t delta = (uintptr_t)((uint8_t*)ptr - 1) - (uintptr_t)slab_base;
|
||||
uintptr_t delta = (uintptr_t)ptr_user_to_base_blind(ptr) - (uintptr_t)slab_base;
|
||||
int cap_ok = (meta->capacity > 0) ? 1 : 0;
|
||||
int align_ok = (delta % blk) == 0;
|
||||
int range_ok = cap_ok && (delta / blk) < meta->capacity;
|
||||
@ -216,7 +216,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
||||
if (__builtin_expect(g_debug_fast0, 0)) {
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_FRONT_BYPASS, (uint16_t)class_idx, ptr, (uintptr_t)slab_idx);
|
||||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
void* prev = meta->freelist;
|
||||
tiny_next_write(class_idx, base, prev); // Box API: uses offset 1 for headers
|
||||
meta->freelist = base;
|
||||
@ -234,7 +234,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
||||
// Front-V2: try to return to TLS magazine first (A/B, default OFF)
|
||||
// Phase 7-Step8: Use config macro for dead code elimination in PGO mode
|
||||
if (__builtin_expect(TINY_FRONT_HEAP_V2_ENABLED && class_idx <= 3, 0)) {
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
if (tiny_heap_v2_try_push(class_idx, base)) {
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_FAST, (uint16_t)class_idx, ptr, slab_idx);
|
||||
HAK_STAT_FREE(class_idx);
|
||||
@ -244,7 +244,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
||||
|
||||
if (g_fast_enable && g_fast_cap[class_idx] != 0) {
|
||||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
int pushed = 0;
|
||||
// Phase 7-Step5: Use config macro for dead code elimination in PGO mode
|
||||
if (__builtin_expect(TINY_FRONT_FASTCACHE_ENABLED && class_idx <= 3, 1)) {
|
||||
@ -268,7 +268,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
||||
// TinyHotMag front push(8/16/32B, A/B)
|
||||
if (__builtin_expect(g_hotmag_enable && class_idx <= 2, 1)) {
|
||||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
if (hotmag_push(class_idx, base)) {
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_RETURN_MAG, (uint16_t)class_idx, ptr, 1);
|
||||
HAK_STAT_FREE(class_idx);
|
||||
@ -277,7 +277,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
||||
}
|
||||
if (tls->count < tls->cap) {
|
||||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
tiny_tls_list_guard_push(class_idx, tls, base);
|
||||
tls_list_push(tls, base, class_idx);
|
||||
tiny_debug_ring_record(TINY_RING_EVENT_FREE_LOCAL, (uint16_t)class_idx, ptr, 0);
|
||||
@ -290,7 +290,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) {
|
||||
}
|
||||
{
|
||||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
tiny_tls_list_guard_push(class_idx, tls, base);
|
||||
tls_list_push(tls, base, class_idx);
|
||||
}
|
||||
@ -332,7 +332,7 @@ void hak_tiny_free(void* ptr) {
|
||||
// Resolve class_idx from per-slab metadata instead of ss->size_class
|
||||
SuperSlab* ss = hak_super_lookup(ptr);
|
||||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
int sidx = slab_index_for(ss, base);
|
||||
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
|
||||
TinySlabMeta* m = &ss->slabs[sidx];
|
||||
@ -392,7 +392,7 @@ void hak_tiny_free(void* ptr) {
|
||||
// Resolve class_idx from per-slab metadata instead of ss->size_class
|
||||
SuperSlab* ss = hak_super_lookup(ptr);
|
||||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
int sidx = slab_index_for(ss, base);
|
||||
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
|
||||
TinySlabMeta* m = &ss->slabs[sidx];
|
||||
@ -470,7 +470,7 @@ void hak_tiny_free(void* ptr) {
|
||||
if (g_use_superslab) {
|
||||
fast_ss = hak_super_lookup(ptr);
|
||||
if (fast_ss && fast_ss->magic == SUPERSLAB_MAGIC) {
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
int sidx = slab_index_for(fast_ss, base);
|
||||
if (sidx >= 0 && sidx < ss_slabs_capacity(fast_ss)) {
|
||||
TinySlabMeta* m = &fast_ss->slabs[sidx];
|
||||
@ -494,7 +494,7 @@ void hak_tiny_free(void* ptr) {
|
||||
int ss_cls = -1, ts_cls = -1;
|
||||
SuperSlab* chk_ss = fast_ss ? fast_ss : (g_use_superslab ? hak_super_lookup(ptr) : NULL);
|
||||
if (chk_ss && chk_ss->magic == SUPERSLAB_MAGIC) {
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
int sidx = slab_index_for(chk_ss, base);
|
||||
if (sidx >= 0 && sidx < ss_slabs_capacity(chk_ss)) {
|
||||
TinySlabMeta* m = &chk_ss->slabs[sidx];
|
||||
@ -516,7 +516,7 @@ void hak_tiny_free(void* ptr) {
|
||||
}
|
||||
if (fast_class_idx >= 0 && g_fast_enable && g_fast_cap[fast_class_idx] != 0) {
|
||||
// Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header
|
||||
void* base2 = (void*)((uint8_t*)ptr - 1);
|
||||
void* base2 = ptr_user_to_base_blind(ptr);
|
||||
// PRIORITY 1: Try FastCache first (bypasses SLL when Front-Direct)
|
||||
int pushed = 0;
|
||||
// Phase 7-Step5: Use config macro for dead code elimination in PGO mode
|
||||
@ -543,7 +543,7 @@ void hak_tiny_free(void* ptr) {
|
||||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||||
// Derive class from per-slab meta
|
||||
int cls = -1;
|
||||
void* base = (void*)((uint8_t*)ptr - 1);
|
||||
void* base = ptr_user_to_base_blind(ptr);
|
||||
int sidx = slab_index_for(ss, base);
|
||||
if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) {
|
||||
TinySlabMeta* m = &ss->slabs[sidx];
|
||||
|
||||
@ -31,7 +31,7 @@
|
||||
|
||||
// Phase 7-Step6-Fix: Config wrapper functions moved to tiny_fastcache.c
|
||||
// (Forward declarations are in tiny_front_config_box.h)
|
||||
#ifdef HAKMEM_TINY_HEADER_CLASSIDX
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
// Ring Cache and Unified Cache removed (A/B test: OFF is faster)
|
||||
#endif
|
||||
#include "box/front_metrics_box.h" // Phase 19-1: Frontend layer metrics
|
||||
|
||||
@ -87,7 +87,10 @@ static inline __attribute__((always_inline)) void* tiny_next_load(const void* ba
|
||||
static inline __attribute__((always_inline)) void tiny_next_store(void* base, int class_idx, void* next) {
|
||||
size_t off = tiny_next_off(class_idx);
|
||||
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
#if HAKMEM_TINY_HEADERLESS
|
||||
// Headerless mode: never restore header
|
||||
(void)class_idx;
|
||||
#elif HAKMEM_TINY_HEADER_CLASSIDX
|
||||
// P2.3: Skip header restoration by default (class_map is now default for class_idx lookup)
|
||||
// ENV: HAKMEM_TINY_RESTORE_HEADER=1 to force header restoration (legacy fallback mode)
|
||||
if (off != 0) {
|
||||
|
||||
@ -6,6 +6,7 @@
|
||||
#include <stdio.h>
|
||||
#include <stdatomic.h>
|
||||
#include <stdlib.h>
|
||||
#include "hakmem_build_flags.h" // Ensure flags are overridden
|
||||
#include "tiny_region_id.h" // For HEADER_MAGIC, HEADER_CLASS_MASK (Fix #6)
|
||||
#include "ptr_track.h" // Pointer tracking for debugging header corruption
|
||||
#include "box/tiny_next_ptr_box.h" // Box API: Next pointer read/write
|
||||
@ -251,7 +252,7 @@ static inline uint32_t trc_pop_from_freelist(struct TinySlabMeta* meta,
|
||||
// DEBUG: Log header restoration for class 2
|
||||
uint8_t before = *(uint8_t*)p;
|
||||
PTR_TRACK_FREELIST_POP(p, class_idx);
|
||||
*(uint8_t*)p = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);
|
||||
tiny_header_write_if_preserved(p, class_idx); // Box API
|
||||
PTR_TRACK_HEADER_WRITE(p, HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK));
|
||||
static _Atomic uint64_t g_freelist_count_c2 = 0;
|
||||
if (class_idx == 2) {
|
||||
@ -320,7 +321,7 @@ static inline uint32_t trc_linear_carve(uint8_t* base, size_t bs,
|
||||
for (uint32_t i = 0; i < batch; i++) {
|
||||
uint8_t* block = cursor + (i * stride);
|
||||
PTR_TRACK_CARVE((void*)block, class_idx);
|
||||
*block = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK);
|
||||
tiny_header_write_if_preserved((void*)block, class_idx); // Box API
|
||||
PTR_TRACK_HEADER_WRITE((void*)block, HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK));
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
|
||||
63
docs/SEGV_INVESTIGATION.md
Normal file
63
docs/SEGV_INVESTIGATION.md
Normal file
@ -0,0 +1,63 @@
|
||||
# SEGV 調査報告書 (Phase 2 Implementaton)
|
||||
|
||||
**ステータス**: Phase 2 実装完了後の初期テストで混合的な結果が発生。
|
||||
|
||||
## 🚨 現状サマリー
|
||||
|
||||
| モード | フラグ | sh8bench (目標) | cfrac (回帰) | larson (回帰) |
|
||||
|---|---|---|---|---|
|
||||
| **Headerless** | `HAKMEM_TINY_HEADERLESS=1` | ✅ **PASS** (Resetなし) | ❌ SEGV | 未確認 |
|
||||
| **Phase 1 互換** | `HAKMEM_TINY_HEADERLESS=0` | ❌ SEGV (Regression) | 未確認 | 未確認 |
|
||||
|
||||
## 🔍 原因分析 (仮説)
|
||||
|
||||
### 1. `ptr_user_to_base_blind` の実装ミス (Phase 1 互換モード)
|
||||
|
||||
`core/box/ptr_conversion_box.h` における実装を確認:
|
||||
|
||||
```c
|
||||
static inline void* ptr_user_to_base_blind(void* user_ptr) {
|
||||
if (user_ptr == NULL) return NULL;
|
||||
|
||||
#if HAKMEM_TINY_HEADERLESS
|
||||
// Headerless: base = user
|
||||
return user_ptr;
|
||||
#else
|
||||
// Phase 1: All classes have 1 byte header -> base = user - 1
|
||||
// ⚠️ ここが間違い!
|
||||
return (void*)((uint8_t*)user_ptr - 1);
|
||||
#endif
|
||||
}
|
||||
```
|
||||
|
||||
**問題点**:
|
||||
Phase 1 (`HAKMEM_TINY_HEADERLESS=0`) において、`tiny_layout_box.h` の定義では:
|
||||
- Class 1-6: Offset = 1
|
||||
- **Class 0, 7: Offset = 0**
|
||||
|
||||
しかし、`ptr_user_to_base_blind` は**無条件に -1 している**ため、Class 0 (8B) と Class 7 (2048B) のポインタが 1バイトずれます。これが `sh8bench` (Class 1使用) 以外のワークロードや、特定の境界条件で SEGV を引き起こしている可能性が高いです。
|
||||
|
||||
### 2. Headerless モードでの cfrac SEGV
|
||||
|
||||
Headerless モードでは Offset=0 (Identity) なので上記の問題は発生しません。
|
||||
しかし `cfrac` が落ちる理由として:
|
||||
- `front_gate_classifier.c` がヘッダー(存在しない)を読もうとしている?
|
||||
- `free_tiny_fast` がヘッダーチェックで誤判定している?
|
||||
- `tiny_region_id_read_header` が無効化されているが、呼び出し元が -1 を期待していない?
|
||||
|
||||
## 🛠️ 修正方針
|
||||
|
||||
### Phase 1 互換モードの修正
|
||||
`ptr_user_to_base_blind` は `class_idx` なしでは正確な Base を特定できません(Class 0/7 かどうか判別できないため)。
|
||||
したがって、`free()` パスでは **SuperSlab から class_idx を引くか、何らかの方法で Offset を特定する必要があります**。
|
||||
|
||||
### Headerless モードの修正
|
||||
`cfrac` の SEGV 箇所を特定し、ヘッダーアクセスが残存している箇所を特定・排除します。
|
||||
|
||||
---
|
||||
|
||||
## 次のステップ
|
||||
|
||||
1. `ptr_user_to_base_blind` のロジック見直し(blind変換は危険すぎる可能性)
|
||||
2. cfrac のバックトレース取得
|
||||
3. 修正パッチの適用
|
||||
Reference in New Issue
Block a user