Phase 13 v1: Header Write Elimination (C7 preserve header)
- Verdict: NEUTRAL (+0.78%)
- Implementation: HAKMEM_TINY_C7_PRESERVE_HEADER ENV gate (default OFF)
- Makes C7 nextptr offset conditional (0→1 when enabled)
- 4-point matrix A/B test results:
* Case A (baseline): 51.49M ops/s
* Case B (WRITE_ONCE=1): 52.07M ops/s (+1.13%)
* Case C (C7_PRESERVE=1): 51.36M ops/s (-0.26%)
* Case D (both): 51.89M ops/s (+0.78% NEUTRAL)
- Action: Freeze as research box (default OFF, manual opt-in)
Phase 5 E5-2: Header Write-Once retest (promotion test)
- Verdict: NEUTRAL (+0.54%)
- Motivation: Phase 13 Case B showed +1.13%, re-tested with dedicated 20-run
- Results (20-run):
* Case A (baseline): 51.10M ops/s
* Case B (WRITE_ONCE=1): 51.37M ops/s (+0.54%)
- Previous test: +0.45% (consistent with NEUTRAL)
- Action: Keep as research box (default OFF, manual opt-in)
Key findings:
- Header write tax optimization shows consistent NEUTRAL results
- Neither Phase 13 v1 nor E5-2 reaches GO threshold (+1.0%)
- Both implemented as reversible ENV gates for future research
Files changed:
- New: core/box/tiny_c7_preserve_header_env_box.{c,h}
- Modified: core/box/tiny_layout_box.h (C7 offset conditional)
- Modified: core/tiny_nextptr.h, core/box/tiny_header_box.h (comments)
- Modified: core/bench_profile.h (refresh sync)
- Modified: Makefile (add new .o files)
- Modified: scripts/run_mixed_10_cleanenv.sh (add C7_PRESERVE ENV)
- Docs: PHASE13_*, PHASE5_E5_2_HEADER_WRITE_ONCE_* (design/results)
Next: Phase 14 (Pointer-chase reduction, tcache-style intrusive LIFO)
🤖 Generated with Claude Code
Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
229 lines
8.8 KiB
C
229 lines
8.8 KiB
C
// tiny_nextptr.h - Authoritative next-pointer offset/load/store for tiny boxes
|
||
//
|
||
// Finalized Phase E1-CORRECT spec (物理制約込み):
|
||
// P0.1 updated: C0 uses offset 0, C1-C6 use offset 1 (header preserved)
|
||
// Phase 13 v1: C7 uses offset 0 (default) or 1 (HAKMEM_TINY_C7_PRESERVE_HEADER=1)
|
||
//
|
||
// HAKMEM_TINY_HEADER_CLASSIDX != 0 のとき:
|
||
//
|
||
// Class 0:
|
||
// [1B header][7B payload] (total 8B stride)
|
||
// → 8B stride に 1B header + 8B next pointer は収まらない(1B溢れる)
|
||
// → next は base+0 に格納(headerを上書き)
|
||
// → next_off = 0
|
||
//
|
||
// Class 1〜6:
|
||
// [1B header][payload >= 15B] (stride >= 16B)
|
||
// → headerは保持し、next は header直後 base+1 に格納
|
||
// → next_off = 1
|
||
//
|
||
// Class 7:
|
||
// [1B header][payload 2047B]
|
||
// → next_off = 0 (default: headerは上書き)
|
||
// → next_off = 1 (Phase 13 v1: HAKMEM_TINY_C7_PRESERVE_HEADER=1)
|
||
//
|
||
// HAKMEM_TINY_HEADER_CLASSIDX == 0 のとき:
|
||
//
|
||
// 全クラス headerなし → next_off = 0
|
||
//
|
||
// このヘッダは上記仕様を唯一の真実として提供する。
|
||
// すべての tiny freelist / TLS / fast-cache / refill / SLL で
|
||
// tiny_next_off/tiny_next_load/tiny_next_store を経由すること。
|
||
// 直接の *(void**) アクセスやローカルな offset 分岐は使用禁止。
|
||
|
||
#ifndef TINY_NEXTPTR_H
|
||
#define TINY_NEXTPTR_H
|
||
|
||
#include <stdint.h>
|
||
#include <string.h>
|
||
#include <stdlib.h> // P2.3: for getenv()
|
||
#include "hakmem_build_flags.h"
|
||
#include "tiny_region_id.h" // HEADER_MAGIC/HEADER_CLASS_MASK for header repair/logging
|
||
#include "hakmem_super_registry.h" // hak_super_lookup
|
||
#include "superslab/superslab_inline.h" // slab_index_for
|
||
#include <stdio.h>
|
||
#include <stdatomic.h>
|
||
#include <dlfcn.h>
|
||
#include <execinfo.h> // backtrace for rare misalign diagnostics
|
||
#include "box/tiny_layout_box.h"
|
||
#include "box/tiny_header_box.h"
|
||
|
||
// Per-thread trace context injected by PTR_NEXT_WRITE macro (for triage)
|
||
static __thread const char* g_tiny_next_tag __attribute__((unused)) = NULL;
|
||
static __thread const char* g_tiny_next_file __attribute__((unused)) = NULL;
|
||
static __thread int g_tiny_next_line __attribute__((unused)) = 0;
|
||
static __thread void* g_tiny_next_ra0 __attribute__((unused)) = NULL;
|
||
static __thread void* g_tiny_next_ra1 __attribute__((unused)) = NULL;
|
||
static __thread void* g_tiny_next_ra2 __attribute__((unused)) = NULL;
|
||
|
||
// Compute freelist next-pointer offset within a block for the given class.
|
||
// P0.1: C0 uses offset 0, C1-C6 use offset 1 (header preserved)
|
||
// Phase 13 v1: C7 uses offset 0 (default) or 1 (HAKMEM_TINY_C7_PRESERVE_HEADER=1)
|
||
// Rationale for C0: 8B stride cannot fit [1B header][8B next pointer] without overflow
|
||
static inline __attribute__((always_inline)) size_t tiny_next_off(int class_idx) {
|
||
return tiny_nextptr_offset(class_idx);
|
||
}
|
||
|
||
#if !HAKMEM_BUILD_RELEASE
|
||
// Optional: log next-pointer writes for triage (env: HAKMEM_TINY_SLL_HEADLOG=1)
|
||
static inline void tiny_next_store_log(int class_idx, void* base, void* next, size_t off)
|
||
{
|
||
static int g_nextlog_en = 1; // default ON for triage; disable with HAKMEM_TINY_SLL_HEADLOG=0
|
||
static int g_nextlog_env_checked = 0;
|
||
static int g_nextlog_cls = -2; // -1 = no filter; >=0 = only that class
|
||
static const char* g_nextlog_tag_filter = NULL; // substring match; NULL = no filter
|
||
if (!g_nextlog_env_checked) {
|
||
const char* e = getenv("HAKMEM_TINY_SLL_HEADLOG");
|
||
if (e && *e == '0') {
|
||
g_nextlog_en = 0;
|
||
}
|
||
const char* c = getenv("HAKMEM_TINY_SLL_NEXTCLS");
|
||
if (c && *c) {
|
||
g_nextlog_cls = atoi(c);
|
||
} else {
|
||
g_nextlog_cls = -1;
|
||
}
|
||
g_nextlog_tag_filter = getenv("HAKMEM_TINY_SLL_NEXTTAG");
|
||
g_nextlog_env_checked = 1;
|
||
}
|
||
if (!__builtin_expect(g_nextlog_en, 0)) return;
|
||
if (g_nextlog_cls >= 0 && class_idx != g_nextlog_cls) return;
|
||
|
||
// Pull tag/callsite from TLS and clear immediately to avoid stale reuse
|
||
const char* tag = g_tiny_next_tag;
|
||
const char* file = g_tiny_next_file;
|
||
int line = g_tiny_next_line;
|
||
void* ra0 = g_tiny_next_ra0;
|
||
void* ra1 = g_tiny_next_ra1;
|
||
void* ra2 = g_tiny_next_ra2;
|
||
g_tiny_next_tag = NULL;
|
||
g_tiny_next_file = NULL;
|
||
g_tiny_next_line = 0;
|
||
g_tiny_next_ra0 = NULL;
|
||
g_tiny_next_ra1 = NULL;
|
||
g_tiny_next_ra2 = NULL;
|
||
if (!tag) return;
|
||
if (g_nextlog_tag_filter && !strstr(tag, g_nextlog_tag_filter)) return;
|
||
|
||
static _Atomic uint32_t g_nextlog_shot = 0;
|
||
uint32_t shot = atomic_fetch_add_explicit(&g_nextlog_shot, 1, memory_order_relaxed);
|
||
if (shot >= 256) return;
|
||
|
||
SuperSlab* ss = hak_super_lookup(base);
|
||
int cap = ss ? ss_slabs_capacity(ss) : 0;
|
||
int idx = (ss && ss->magic == SUPERSLAB_MAGIC) ? slab_index_for(ss, base) : -1;
|
||
uint8_t cls = (idx >= 0 && idx < cap) ? ss->slabs[idx].class_idx : 0xff;
|
||
void* ra = __builtin_return_address(0);
|
||
fprintf(stderr,
|
||
"[TINY_NEXT_STORE] shot=%u cls=%d base=%p next=%p off=%zu ss=%p idx=%d meta_cls=%u caller=%p tag=%s site=%s:%d ra0=%p ra1=%p ra2=%p\n",
|
||
shot + 1,
|
||
class_idx,
|
||
base,
|
||
next,
|
||
off,
|
||
(void*)ss,
|
||
idx,
|
||
(unsigned)cls,
|
||
ra,
|
||
tag,
|
||
file,
|
||
line,
|
||
ra0,
|
||
ra1,
|
||
ra2);
|
||
// Early frames for offline addr2line when caller symbols are missing
|
||
if (shot < 24) {
|
||
void* bt[16];
|
||
int frames = backtrace(bt, 16);
|
||
backtrace_symbols_fd(bt, frames, fileno(stderr));
|
||
}
|
||
// Backtrace only for clearly misaligned bases (likely user pointers)
|
||
if (((uintptr_t)base & 0xF) != 0) {
|
||
static _Atomic uint32_t g_next_bt = 0;
|
||
uint32_t bt_shot = atomic_fetch_add_explicit(&g_next_bt, 1, memory_order_relaxed);
|
||
if (bt_shot < 8) {
|
||
void* bt[16];
|
||
int frames = backtrace(bt, 16);
|
||
backtrace_symbols_fd(bt, frames, fileno(stderr));
|
||
}
|
||
}
|
||
}
|
||
#else
|
||
// Release build: no-op (triage logging disabled)
|
||
static inline void tiny_next_store_log(int class_idx, void* base, void* next, size_t off)
|
||
{
|
||
(void)class_idx;
|
||
(void)base;
|
||
(void)next;
|
||
(void)off;
|
||
}
|
||
#endif
|
||
|
||
// Safe load of next pointer from a block base.
|
||
static inline __attribute__((always_inline)) void* tiny_next_load(const void* base, int class_idx) {
|
||
size_t off = tiny_next_off(class_idx);
|
||
|
||
if (off == 0) {
|
||
// Aligned access at base (header無し or C7 freelist時)
|
||
void* next = *(void* const*)base;
|
||
|
||
// P3: Prevent compiler from reordering this load
|
||
__atomic_thread_fence(__ATOMIC_ACQUIRE);
|
||
return next;
|
||
}
|
||
|
||
// off != 0: use memcpy to avoid UB on architectures that forbid unaligned loads.
|
||
// C0-C6: offset 1 (header preserved)
|
||
void* next = NULL;
|
||
const uint8_t* p = (const uint8_t*)base + off;
|
||
memcpy(&next, p, sizeof(void*));
|
||
|
||
// P3: Prevent compiler from reordering this load
|
||
__atomic_thread_fence(__ATOMIC_ACQUIRE);
|
||
return next;
|
||
}
|
||
|
||
// Safe store of next pointer into a block base.
|
||
// P2.3: Header restoration is now conditional (default: skip when class_map is active)
|
||
// - When class_map is used for class_idx lookup (default), header restoration is unnecessary
|
||
// - Alloc path always writes fresh header before returning block to user (HAK_RET_ALLOC)
|
||
// - ENV: HAKMEM_TINY_RESTORE_HEADER=1 to force header restoration (legacy mode)
|
||
// P0.1: C0 uses offset 0 (overwrites header), C1-C6 use offset 1 (header preserved)
|
||
// Phase 13 v1: C7 uses offset 0 (default) or 1 (HAKMEM_TINY_C7_PRESERVE_HEADER=1)
|
||
static inline __attribute__((always_inline)) void tiny_next_store(void* base, int class_idx, void* next) {
|
||
size_t off = tiny_next_off(class_idx);
|
||
|
||
#if HAKMEM_TINY_HEADERLESS
|
||
// Headerless mode: never restore header
|
||
(void)class_idx;
|
||
#elif HAKMEM_TINY_HEADER_CLASSIDX
|
||
// P2.3: Skip header restoration by default (class_map is now default for class_idx lookup)
|
||
// ENV: HAKMEM_TINY_RESTORE_HEADER=1 to force header restoration (legacy fallback mode)
|
||
if (off != 0) {
|
||
static int g_restore_header = -1;
|
||
if (__builtin_expect(g_restore_header == -1, 0)) {
|
||
const char* e = getenv("HAKMEM_TINY_RESTORE_HEADER");
|
||
g_restore_header = (e && *e && *e != '0') ? 1 : 0;
|
||
}
|
||
if (__builtin_expect(g_restore_header, 0)) {
|
||
// Legacy mode: Restore header for classes that preserve it (C0-C6)
|
||
tiny_header_write_if_preserved(base, class_idx);
|
||
}
|
||
}
|
||
#endif
|
||
|
||
if (off == 0) {
|
||
// Aligned access at base (overwrites header for C7).
|
||
*(void**)base = next;
|
||
tiny_next_store_log(class_idx, base, next, off);
|
||
return;
|
||
}
|
||
|
||
// off != 0: use memcpy for portability / UB-avoidance.
|
||
uint8_t* p = (uint8_t*)base + off;
|
||
memcpy(p, &next, sizeof(void*));
|
||
tiny_next_store_log(class_idx, base, next, off);
|
||
}
|
||
|
||
#endif // TINY_NEXTPTR_H
|