diff --git a/Makefile b/Makefile index e7791947..4b91bc7e 100644 --- a/Makefile +++ b/Makefile @@ -214,6 +214,7 @@ LDFLAGS += -fprofile-use endif CFLAGS += $(EXTRA_CFLAGS) +CFLAGS_SHARED += $(EXTRA_CFLAGS) LDFLAGS += $(EXTRA_LDFLAGS) # Targets diff --git a/core/box/bench_fast_box.c b/core/box/bench_fast_box.c index cdc31d75..87c09515 100644 --- a/core/box/bench_fast_box.c +++ b/core/box/bench_fast_box.c @@ -72,7 +72,7 @@ void* bench_fast_alloc(size_t size) { // Reason: P3 optimization skips header writes by default (class_map mode) // But BenchFast REQUIRES headers for free routing (0xa0-0xa7 magic) // Contract: BenchFast always writes headers, regardless of P3 optimization - #ifdef HAKMEM_TINY_HEADER_CLASSIDX + #if HAKMEM_TINY_HEADER_CLASSIDX *(uint8_t*)base = (uint8_t)(0xa0 | (class_idx & 0x0f)); // Direct header write return (void*)((char*)base + 1); // Return USER pointer #else @@ -88,7 +88,7 @@ void* bench_fast_alloc(size_t size) { void bench_fast_free(void* ptr) { if (__builtin_expect(!ptr, 0)) return; - #ifdef HAKMEM_TINY_HEADER_CLASSIDX + #if HAKMEM_TINY_HEADER_CLASSIDX // 1. Read class_idx from header (1 instruction, 2-3 cycles) int class_idx = tiny_region_id_read_header(ptr); if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) { @@ -156,7 +156,7 @@ int bench_fast_init(void) { for (int i = 0; i < (int)capacity; i++) { // Use normal allocator (hak_alloc_at) - recursion safe here size_t size = g_tiny_class_sizes[cls]; - #ifdef HAKMEM_TINY_HEADER_CLASSIDX + #if HAKMEM_TINY_HEADER_CLASSIDX // Adjust for header: if class size is N, we need N-1 bytes of user data size = size - 1; #endif @@ -170,7 +170,7 @@ int bench_fast_init(void) { return total; } - #ifdef HAKMEM_TINY_HEADER_CLASSIDX + #if HAKMEM_TINY_HEADER_CLASSIDX // Convert USER → BASE pointer void* base = (void*)((char*)ptr - 1); diff --git a/core/box/front_gate_classifier.c b/core/box/front_gate_classifier.c index 3a3518a8..9a961f98 100644 --- a/core/box/front_gate_classifier.c +++ b/core/box/front_gate_classifier.c @@ -80,6 +80,7 @@ static inline int safe_header_probe(void* ptr) { return -1; } +#if HAKMEM_TINY_HEADER_CLASSIDX // Safe to read header (same page guaranteed) uint8_t* header_ptr = (uint8_t*)ptr - 1; uint8_t header = *header_ptr; @@ -98,6 +99,9 @@ static inline int safe_header_probe(void* ptr) { } return class_idx; +#else + return -1; +#endif } // ========== Registry Lookup ========== @@ -200,6 +204,7 @@ ptr_classification_t classify_ptr(void* ptr) { uint8_t magic = header & 0xF0; // Fast path: Tiny allocation (magic = 0xa0) — guarded by Superslab registry +#if HAKMEM_TINY_HEADER_CLASSIDX if (magic == HEADER_MAGIC) { // HEADER_MAGIC = 0xa0 int class_idx = header & HEADER_CLASS_MASK; if (class_idx >= 0 && class_idx < TINY_NUM_CLASSES) { @@ -219,6 +224,7 @@ ptr_classification_t classify_ptr(void* ptr) { } } } +#endif #ifdef HAKMEM_POOL_TLS_PHASE1 // Fast path: Pool TLS allocation (magic = 0xb0) diff --git a/core/box/hak_core_init.inc.h b/core/box/hak_core_init.inc.h index 962cbc7f..3c1b8907 100644 --- a/core/box/hak_core_init.inc.h +++ b/core/box/hak_core_init.inc.h @@ -127,7 +127,7 @@ static void hak_init_impl(void) { #endif HAKMEM_LOG("[Build] Flavor=%s Flags: HEADER_CLASSIDX=%d, AGGRESSIVE_INLINE=%d, POOL_TLS_PHASE1=%d, POOL_TLS_PREWARM=%d\n", bf, -#ifdef HAKMEM_TINY_HEADER_CLASSIDX +#if HAKMEM_TINY_HEADER_CLASSIDX 1, #else 0, diff --git a/core/box/hak_wrappers.inc.h b/core/box/hak_wrappers.inc.h index 2c879526..11719980 100644 --- a/core/box/hak_wrappers.inc.h +++ b/core/box/hak_wrappers.inc.h @@ -226,7 +226,7 @@ void free(void* ptr) { // WARNING: Bypasses ALL safety checks - benchmark only! if (__builtin_expect(bench_fast_enabled(), 0)) { // Trust header magic to identify Tiny allocations - #ifdef HAKMEM_TINY_HEADER_CLASSIDX + #if HAKMEM_TINY_HEADER_CLASSIDX uint8_t header = *((uint8_t*)ptr - 1); if ((header & 0xf0) == 0xa0) { // Tiny header magic (0xa0-0xa7) bench_fast_free(ptr); diff --git a/core/box/ptr_conversion_box.h b/core/box/ptr_conversion_box.h index 1b48db78..ea0b9cfb 100644 --- a/core/box/ptr_conversion_box.h +++ b/core/box/ptr_conversion_box.h @@ -44,8 +44,15 @@ static inline hak_user_ptr_t ptr_base_to_user(hak_base_ptr_t base, int class_idx return HAK_USER_FROM_RAW(NULL); } +#if HAKMEM_TINY_HEADERLESS + (void)class_idx; + // Headerless: user = base (identity) + void* raw_user = raw_base; + size_t offset = 0; +#else size_t offset = tiny_user_offset(class_idx); void* raw_user = (void*)((uint8_t*)raw_base + offset); +#endif PTR_CONV_LOG("BASE→USER cls=%d base=%p → user=%p (+%zu)\n", class_idx, raw_base, raw_user, offset); @@ -66,14 +73,37 @@ static inline hak_base_ptr_t ptr_user_to_base(hak_user_ptr_t user, int class_idx return HAK_BASE_FROM_RAW(NULL); } +#if HAKMEM_TINY_HEADERLESS + (void)class_idx; + // Headerless: base = user (identity) + void* raw_base = raw_user; + size_t offset = 0; +#else size_t offset = tiny_user_offset(class_idx); void* raw_base = (void*)((uint8_t*)raw_user - offset); +#endif PTR_CONV_LOG("USER→BASE cls=%d user=%p → base=%p (-%zu)\n", class_idx, raw_user, raw_base, offset); return HAK_BASE_FROM_RAW(raw_base); } +/** + * Convert USER pointer to BASE pointer WITHOUT knowing class_idx + * Primary use: free() entry point where class is not yet known + */ +static inline void* ptr_user_to_base_blind(void* user_ptr) { + if (user_ptr == NULL) return NULL; + +#if HAKMEM_TINY_HEADERLESS + // Headerless: base = user + return user_ptr; +#else + // Phase 1: All classes have 1 byte header -> base = user - 1 + return (void*)((uint8_t*)user_ptr - 1); +#endif +} + /** * Convenience macros for cleaner call sites */ diff --git a/core/box/tiny_front_cold_box.h b/core/box/tiny_front_cold_box.h index b1de8596..1a8e9fdf 100644 --- a/core/box/tiny_front_cold_box.h +++ b/core/box/tiny_front_cold_box.h @@ -75,7 +75,7 @@ static inline void* tiny_cold_refill_and_alloc(int class_idx) { // Success: return USER pointer // NOTE: Header already written by unified_cache_refill() // (Removed redundant tiny_region_id_write_header() - P2 fix) - #ifdef HAKMEM_TINY_HEADER_CLASSIDX + #if HAKMEM_TINY_HEADER_CLASSIDX return (void*)((char*)base + 1); // USER pointer #else return base; diff --git a/core/box/tiny_front_hot_box.h b/core/box/tiny_front_hot_box.h index 84b1984e..baeac0e7 100644 --- a/core/box/tiny_front_hot_box.h +++ b/core/box/tiny_front_hot_box.h @@ -126,7 +126,7 @@ static inline void* tiny_hot_alloc_fast(int class_idx) { TINY_HOT_METRICS_HIT(class_idx); // Write header + return USER pointer (no branch) - #ifdef HAKMEM_TINY_HEADER_CLASSIDX + #if HAKMEM_TINY_HEADER_CLASSIDX tiny_region_id_write_header(base, class_idx); // 1-byte header at BASE return (void*)((char*)base + 1); // Return USER pointer (BASE+1) #else diff --git a/core/box/tiny_layout_box.h b/core/box/tiny_layout_box.h index 53b691e5..2b741515 100644 --- a/core/box/tiny_layout_box.h +++ b/core/box/tiny_layout_box.h @@ -17,11 +17,32 @@ #include #include "../hakmem_tiny_config.h" // For g_tiny_class_sizes and TINY_NUM_CLASSES +// A/B Toggle: Headerless mode +// ENV: HAKMEM_TINY_HEADERLESS=1 to enable +// Default: 0 (Phase 1 compatible) +#ifndef HAKMEM_TINY_HEADERLESS + #define HAKMEM_TINY_HEADERLESS 0 +#endif + // Define all class-specific layout parameters // Current: Defined in g_tiny_class_sizes[8] in hakmem_tiny.c // This file makes them accessible via a unified Box API -// Header size is 1 byte when enabled +// Header size +static inline size_t tiny_header_size(int class_idx) { +#if HAKMEM_TINY_HEADERLESS + (void)class_idx; + return 0; +#else + // Phase 1: 1 byte header if enabled + // C0 (8B): offset 0 (8B stride too small for header + 8B pointer - would overflow) + // C7 (2048B): offset 0 (overwrites header in freelist - largest class can tolerate) + // C1-C6: offset 1 (header preserved - user data is not disturbed) + return (0x7Eu >> class_idx) & 1u; +#endif +} + +// Legacy macro for backward compatibility (Phase 1) #define TINY_HEADER_SIZE 1 // Validation macros @@ -38,7 +59,10 @@ static inline size_t tiny_class_stride(int class_idx) { // Calculate user pointer offset from base pointer // This logic centralizes the "User = Base + 1" vs "User = Base + 0" decision static inline size_t tiny_user_offset(int class_idx) { -#if HAKMEM_TINY_HEADER_CLASSIDX +#if HAKMEM_TINY_HEADERLESS + (void)class_idx; + return 0; // Headerless: user = base +#elif HAKMEM_TINY_HEADER_CLASSIDX // C0 (8B): offset 0 (8B stride too small for header + 8B pointer - would overflow) // C7 (2048B): offset 0 (overwrites header in freelist - largest class can tolerate) // C1-C6: offset 1 (header preserved - user data is not disturbed) diff --git a/core/box/tls_sll_box.h b/core/box/tls_sll_box.h index 021b6869..b1d81fbf 100644 --- a/core/box/tls_sll_box.h +++ b/core/box/tls_sll_box.h @@ -368,6 +368,7 @@ static inline bool tls_sll_push_impl(int class_idx, hak_base_ptr_t ptr, uint32_t // Header restoration using Header Box (C1-C6 only; C0/C7 skip) // Safe mode (HAKMEM_TINY_SLL_SAFEHEADER=1): never overwrite header; reject on magic mismatch. // Default mode: restore expected header. +#if !HAKMEM_TINY_HEADERLESS if (tiny_class_preserves_header(class_idx)) { static int g_sll_safehdr = -1; static int g_sll_ring_en = -1; // optional ring trace for TLS-SLL anomalies @@ -407,6 +408,7 @@ static inline bool tls_sll_push_impl(int class_idx, hak_base_ptr_t ptr, uint32_t __atomic_thread_fence(__ATOMIC_RELEASE); } } +#endif tls_sll_debug_guard(class_idx, ptr, "push"); @@ -568,6 +570,7 @@ static inline bool tls_sll_pop_impl(int class_idx, hak_base_ptr_t* out, const ch tls_sll_debug_guard(class_idx, base, "pop"); // Header validation using Header Box (C1-C6 only; C0/C7 skip) +#if !HAKMEM_TINY_HEADERLESS if (tiny_class_preserves_header(class_idx)) { uint8_t got, expect; PTR_TRACK_TLS_POP(raw_base, class_idx); @@ -608,6 +611,7 @@ static inline bool tls_sll_pop_impl(int class_idx, hak_base_ptr_t* out, const ch #endif } } +#endif // Read next via Box API. void* raw_next; diff --git a/core/front/malloc_tiny_fast.h b/core/front/malloc_tiny_fast.h index e956be47..f0c1f9ae 100644 --- a/core/front/malloc_tiny_fast.h +++ b/core/front/malloc_tiny_fast.h @@ -126,7 +126,7 @@ __attribute__((always_inline)) static inline int free_tiny_fast(void* ptr) { if (__builtin_expect(!ptr, 0)) return 0; - #ifdef HAKMEM_TINY_HEADER_CLASSIDX + #if HAKMEM_TINY_HEADER_CLASSIDX // 1. ページ境界ガード: // ptr がページ先頭 (offset==0) の場合、ptr-1 は別ページか未マップ領域になる可能性がある。 // その場合はヘッダ読みを行わず、通常 free 経路にフォールバックする。 diff --git a/core/hakmem_build_flags.h b/core/hakmem_build_flags.h index 4b0b1f83..f38c8fa0 100644 --- a/core/hakmem_build_flags.h +++ b/core/hakmem_build_flags.h @@ -5,6 +5,15 @@ #ifndef HAKMEM_BUILD_FLAGS_H #define HAKMEM_BUILD_FLAGS_H +// ------------------------------------------------------------ +// Phase 2: Headerless Mode Override +// ------------------------------------------------------------ +// If Headerless is enabled, force HEADER_CLASSIDX to 0 +#if defined(HAKMEM_TINY_HEADERLESS) && HAKMEM_TINY_HEADERLESS + #undef HAKMEM_TINY_HEADER_CLASSIDX + #define HAKMEM_TINY_HEADER_CLASSIDX 0 +#endif + // ------------------------------------------------------------ // Release/debug detection // ------------------------------------------------------------ diff --git a/core/hakmem_tiny_config_box.inc b/core/hakmem_tiny_config_box.inc index be02db6b..6728075f 100644 --- a/core/hakmem_tiny_config_box.inc +++ b/core/hakmem_tiny_config_box.inc @@ -131,11 +131,20 @@ static __thread unsigned char g_tls_bench_warm_done[4]; static inline void tiny_debug_track_alloc_ret(int cls, void* ptr); // ========== HAK_RET_ALLOC: Single Definition Point ========== -// Choose implementation based on HAKMEM_TINY_HEADER_CLASSIDX -// - Phase 7 enabled: Write header and return user pointer +// Choose implementation based on HAKMEM_TINY_HEADERLESS or HAKMEM_TINY_HEADER_CLASSIDX +// - Headerless enabled: Return base directly (no header write) +// - Phase 7 enabled: Write header and return user pointer (base+1) // - Phase 7 disabled: Legacy behavior (stats + route + return) -#if HAKMEM_TINY_HEADER_CLASSIDX +#if HAKMEM_TINY_HEADERLESS + // Headerless: No header write, user = base + // Ultra-fast inline macro (1-2 instructions) + #define HAK_RET_ALLOC(cls, base_ptr) do { \ + /* No header write needed for Headerless mode */ \ + return (base_ptr); \ + } while(0) + +#elif HAKMEM_TINY_HEADER_CLASSIDX #if HAKMEM_BUILD_RELEASE // Phase E1-CORRECT: ALL classes have 1-byte headers (including C7) // Ultra-fast inline macro (3-4 instructions) diff --git a/core/hakmem_tiny_free.inc b/core/hakmem_tiny_free.inc index 0d21c943..12a00102 100644 --- a/core/hakmem_tiny_free.inc +++ b/core/hakmem_tiny_free.inc @@ -125,7 +125,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) { if (!ss || ss->magic != SUPERSLAB_MAGIC) return; // Derive class_idx from per-slab metadata instead of ss->size_class int class_idx = -1; - void* base = (void*)((uint8_t*)ptr - 1); + void* base = ptr_user_to_base_blind(ptr); int slab_idx = slab_index_for(ss, base); if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) { TinySlabMeta* meta_probe = &ss->slabs[slab_idx]; @@ -156,7 +156,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) { tiny_debug_ring_record(TINY_RING_EVENT_FREE_ENTER, (uint16_t)class_idx, ptr, 0); // Detect cross-thread: cross-thread free MUST go via superslab path // ✅ FIX: Phase E1-CORRECT - Convert USER → BASE before slab index calculation - base = (void*)((uint8_t*)ptr - 1); + base = ptr_user_to_base_blind(ptr); slab_idx = slab_index_for(ss, base); int ss_cap = ss_slabs_capacity(ss); if (__builtin_expect(slab_idx < 0 || slab_idx >= ss_cap, 0)) { @@ -168,7 +168,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) { size_t blk = g_tiny_class_sizes[class_idx]; uint8_t* slab_base = tiny_slab_base_for(ss, slab_idx); // Phase E1-CORRECT: All classes have headers, validate block base (ptr-1) not user ptr - uintptr_t delta = (uintptr_t)((uint8_t*)ptr - 1) - (uintptr_t)slab_base; + uintptr_t delta = (uintptr_t)ptr_user_to_base_blind(ptr) - (uintptr_t)slab_base; int cap_ok = (meta->capacity > 0) ? 1 : 0; int align_ok = (delta % blk) == 0; int range_ok = cap_ok && (delta / blk) < meta->capacity; @@ -216,7 +216,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) { if (__builtin_expect(g_debug_fast0, 0)) { tiny_debug_ring_record(TINY_RING_EVENT_FRONT_BYPASS, (uint16_t)class_idx, ptr, (uintptr_t)slab_idx); // Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header - void* base = (void*)((uint8_t*)ptr - 1); + void* base = ptr_user_to_base_blind(ptr); void* prev = meta->freelist; tiny_next_write(class_idx, base, prev); // Box API: uses offset 1 for headers meta->freelist = base; @@ -234,7 +234,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) { // Front-V2: try to return to TLS magazine first (A/B, default OFF) // Phase 7-Step8: Use config macro for dead code elimination in PGO mode if (__builtin_expect(TINY_FRONT_HEAP_V2_ENABLED && class_idx <= 3, 0)) { - void* base = (void*)((uint8_t*)ptr - 1); + void* base = ptr_user_to_base_blind(ptr); if (tiny_heap_v2_try_push(class_idx, base)) { tiny_debug_ring_record(TINY_RING_EVENT_FREE_FAST, (uint16_t)class_idx, ptr, slab_idx); HAK_STAT_FREE(class_idx); @@ -244,7 +244,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) { if (g_fast_enable && g_fast_cap[class_idx] != 0) { // Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header - void* base = (void*)((uint8_t*)ptr - 1); + void* base = ptr_user_to_base_blind(ptr); int pushed = 0; // Phase 7-Step5: Use config macro for dead code elimination in PGO mode if (__builtin_expect(TINY_FRONT_FASTCACHE_ENABLED && class_idx <= 3, 1)) { @@ -268,7 +268,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) { // TinyHotMag front push(8/16/32B, A/B) if (__builtin_expect(g_hotmag_enable && class_idx <= 2, 1)) { // Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header - void* base = (void*)((uint8_t*)ptr - 1); + void* base = ptr_user_to_base_blind(ptr); if (hotmag_push(class_idx, base)) { tiny_debug_ring_record(TINY_RING_EVENT_FREE_RETURN_MAG, (uint16_t)class_idx, ptr, 1); HAK_STAT_FREE(class_idx); @@ -277,7 +277,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) { } if (tls->count < tls->cap) { // Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header - void* base = (void*)((uint8_t*)ptr - 1); + void* base = ptr_user_to_base_blind(ptr); tiny_tls_list_guard_push(class_idx, tls, base); tls_list_push(tls, base, class_idx); tiny_debug_ring_record(TINY_RING_EVENT_FREE_LOCAL, (uint16_t)class_idx, ptr, 0); @@ -290,7 +290,7 @@ void hak_tiny_free_with_slab(void* ptr, TinySlab* slab) { } { // Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header - void* base = (void*)((uint8_t*)ptr - 1); + void* base = ptr_user_to_base_blind(ptr); tiny_tls_list_guard_push(class_idx, tls, base); tls_list_push(tls, base, class_idx); } @@ -332,7 +332,7 @@ void hak_tiny_free(void* ptr) { // Resolve class_idx from per-slab metadata instead of ss->size_class SuperSlab* ss = hak_super_lookup(ptr); if (ss && ss->magic == SUPERSLAB_MAGIC) { - void* base = (void*)((uint8_t*)ptr - 1); + void* base = ptr_user_to_base_blind(ptr); int sidx = slab_index_for(ss, base); if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) { TinySlabMeta* m = &ss->slabs[sidx]; @@ -392,7 +392,7 @@ void hak_tiny_free(void* ptr) { // Resolve class_idx from per-slab metadata instead of ss->size_class SuperSlab* ss = hak_super_lookup(ptr); if (ss && ss->magic == SUPERSLAB_MAGIC) { - void* base = (void*)((uint8_t*)ptr - 1); + void* base = ptr_user_to_base_blind(ptr); int sidx = slab_index_for(ss, base); if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) { TinySlabMeta* m = &ss->slabs[sidx]; @@ -470,7 +470,7 @@ void hak_tiny_free(void* ptr) { if (g_use_superslab) { fast_ss = hak_super_lookup(ptr); if (fast_ss && fast_ss->magic == SUPERSLAB_MAGIC) { - void* base = (void*)((uint8_t*)ptr - 1); + void* base = ptr_user_to_base_blind(ptr); int sidx = slab_index_for(fast_ss, base); if (sidx >= 0 && sidx < ss_slabs_capacity(fast_ss)) { TinySlabMeta* m = &fast_ss->slabs[sidx]; @@ -494,7 +494,7 @@ void hak_tiny_free(void* ptr) { int ss_cls = -1, ts_cls = -1; SuperSlab* chk_ss = fast_ss ? fast_ss : (g_use_superslab ? hak_super_lookup(ptr) : NULL); if (chk_ss && chk_ss->magic == SUPERSLAB_MAGIC) { - void* base = (void*)((uint8_t*)ptr - 1); + void* base = ptr_user_to_base_blind(ptr); int sidx = slab_index_for(chk_ss, base); if (sidx >= 0 && sidx < ss_slabs_capacity(chk_ss)) { TinySlabMeta* m = &chk_ss->slabs[sidx]; @@ -516,7 +516,7 @@ void hak_tiny_free(void* ptr) { } if (fast_class_idx >= 0 && g_fast_enable && g_fast_cap[fast_class_idx] != 0) { // Phase E1-CORRECT: ALL classes (C0-C7) have 1-byte header - void* base2 = (void*)((uint8_t*)ptr - 1); + void* base2 = ptr_user_to_base_blind(ptr); // PRIORITY 1: Try FastCache first (bypasses SLL when Front-Direct) int pushed = 0; // Phase 7-Step5: Use config macro for dead code elimination in PGO mode @@ -543,7 +543,7 @@ void hak_tiny_free(void* ptr) { if (ss && ss->magic == SUPERSLAB_MAGIC) { // Derive class from per-slab meta int cls = -1; - void* base = (void*)((uint8_t*)ptr - 1); + void* base = ptr_user_to_base_blind(ptr); int sidx = slab_index_for(ss, base); if (sidx >= 0 && sidx < ss_slabs_capacity(ss)) { TinySlabMeta* m = &ss->slabs[sidx]; diff --git a/core/tiny_alloc_fast.inc.h b/core/tiny_alloc_fast.inc.h index 0b7aecc3..6826dc19 100644 --- a/core/tiny_alloc_fast.inc.h +++ b/core/tiny_alloc_fast.inc.h @@ -31,7 +31,7 @@ // Phase 7-Step6-Fix: Config wrapper functions moved to tiny_fastcache.c // (Forward declarations are in tiny_front_config_box.h) -#ifdef HAKMEM_TINY_HEADER_CLASSIDX +#if HAKMEM_TINY_HEADER_CLASSIDX // Ring Cache and Unified Cache removed (A/B test: OFF is faster) #endif #include "box/front_metrics_box.h" // Phase 19-1: Frontend layer metrics diff --git a/core/tiny_nextptr.h b/core/tiny_nextptr.h index 0ea57aa2..0ef29253 100644 --- a/core/tiny_nextptr.h +++ b/core/tiny_nextptr.h @@ -87,7 +87,10 @@ static inline __attribute__((always_inline)) void* tiny_next_load(const void* ba static inline __attribute__((always_inline)) void tiny_next_store(void* base, int class_idx, void* next) { size_t off = tiny_next_off(class_idx); -#if HAKMEM_TINY_HEADER_CLASSIDX +#if HAKMEM_TINY_HEADERLESS + // Headerless mode: never restore header + (void)class_idx; +#elif HAKMEM_TINY_HEADER_CLASSIDX // P2.3: Skip header restoration by default (class_map is now default for class_idx lookup) // ENV: HAKMEM_TINY_RESTORE_HEADER=1 to force header restoration (legacy fallback mode) if (off != 0) { diff --git a/core/tiny_refill_opt.h b/core/tiny_refill_opt.h index fc2b258c..3c9c5e70 100644 --- a/core/tiny_refill_opt.h +++ b/core/tiny_refill_opt.h @@ -6,6 +6,7 @@ #include #include #include +#include "hakmem_build_flags.h" // Ensure flags are overridden #include "tiny_region_id.h" // For HEADER_MAGIC, HEADER_CLASS_MASK (Fix #6) #include "ptr_track.h" // Pointer tracking for debugging header corruption #include "box/tiny_next_ptr_box.h" // Box API: Next pointer read/write @@ -251,7 +252,7 @@ static inline uint32_t trc_pop_from_freelist(struct TinySlabMeta* meta, // DEBUG: Log header restoration for class 2 uint8_t before = *(uint8_t*)p; PTR_TRACK_FREELIST_POP(p, class_idx); - *(uint8_t*)p = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK); + tiny_header_write_if_preserved(p, class_idx); // Box API PTR_TRACK_HEADER_WRITE(p, HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK)); static _Atomic uint64_t g_freelist_count_c2 = 0; if (class_idx == 2) { @@ -320,7 +321,7 @@ static inline uint32_t trc_linear_carve(uint8_t* base, size_t bs, for (uint32_t i = 0; i < batch; i++) { uint8_t* block = cursor + (i * stride); PTR_TRACK_CARVE((void*)block, class_idx); - *block = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK); + tiny_header_write_if_preserved((void*)block, class_idx); // Box API PTR_TRACK_HEADER_WRITE((void*)block, HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK)); #if !HAKMEM_BUILD_RELEASE diff --git a/docs/SEGV_INVESTIGATION.md b/docs/SEGV_INVESTIGATION.md new file mode 100644 index 00000000..7b438980 --- /dev/null +++ b/docs/SEGV_INVESTIGATION.md @@ -0,0 +1,63 @@ +# SEGV 調査報告書 (Phase 2 Implementaton) + +**ステータス**: Phase 2 実装完了後の初期テストで混合的な結果が発生。 + +## 🚨 現状サマリー + +| モード | フラグ | sh8bench (目標) | cfrac (回帰) | larson (回帰) | +|---|---|---|---|---| +| **Headerless** | `HAKMEM_TINY_HEADERLESS=1` | ✅ **PASS** (Resetなし) | ❌ SEGV | 未確認 | +| **Phase 1 互換** | `HAKMEM_TINY_HEADERLESS=0` | ❌ SEGV (Regression) | 未確認 | 未確認 | + +## 🔍 原因分析 (仮説) + +### 1. `ptr_user_to_base_blind` の実装ミス (Phase 1 互換モード) + +`core/box/ptr_conversion_box.h` における実装を確認: + +```c +static inline void* ptr_user_to_base_blind(void* user_ptr) { + if (user_ptr == NULL) return NULL; + +#if HAKMEM_TINY_HEADERLESS + // Headerless: base = user + return user_ptr; +#else + // Phase 1: All classes have 1 byte header -> base = user - 1 + // ⚠️ ここが間違い! + return (void*)((uint8_t*)user_ptr - 1); +#endif +} +``` + +**問題点**: +Phase 1 (`HAKMEM_TINY_HEADERLESS=0`) において、`tiny_layout_box.h` の定義では: +- Class 1-6: Offset = 1 +- **Class 0, 7: Offset = 0** + +しかし、`ptr_user_to_base_blind` は**無条件に -1 している**ため、Class 0 (8B) と Class 7 (2048B) のポインタが 1バイトずれます。これが `sh8bench` (Class 1使用) 以外のワークロードや、特定の境界条件で SEGV を引き起こしている可能性が高いです。 + +### 2. Headerless モードでの cfrac SEGV + +Headerless モードでは Offset=0 (Identity) なので上記の問題は発生しません。 +しかし `cfrac` が落ちる理由として: +- `front_gate_classifier.c` がヘッダー(存在しない)を読もうとしている? +- `free_tiny_fast` がヘッダーチェックで誤判定している? +- `tiny_region_id_read_header` が無効化されているが、呼び出し元が -1 を期待していない? + +## 🛠️ 修正方針 + +### Phase 1 互換モードの修正 +`ptr_user_to_base_blind` は `class_idx` なしでは正確な Base を特定できません(Class 0/7 かどうか判別できないため)。 +したがって、`free()` パスでは **SuperSlab から class_idx を引くか、何らかの方法で Offset を特定する必要があります**。 + +### Headerless モードの修正 +`cfrac` の SEGV 箇所を特定し、ヘッダーアクセスが残存している箇所を特定・排除します。 + +--- + +## 次のステップ + +1. `ptr_user_to_base_blind` のロジック見直し(blind変換は危険すぎる可能性) +2. cfrac のバックトレース取得 +3. 修正パッチの適用