Files
hakmem/core/tiny_nextptr.h
Moe Charm (CI) cbb35ee27f Phase 13 v1 + E5-2 retest: Both NEUTRAL, freeze as research boxes
Phase 13 v1: Header Write Elimination (C7 preserve header)
- Verdict: NEUTRAL (+0.78%)
- Implementation: HAKMEM_TINY_C7_PRESERVE_HEADER ENV gate (default OFF)
- Makes C7 nextptr offset conditional (0→1 when enabled)
- 4-point matrix A/B test results:
  * Case A (baseline): 51.49M ops/s
  * Case B (WRITE_ONCE=1): 52.07M ops/s (+1.13%)
  * Case C (C7_PRESERVE=1): 51.36M ops/s (-0.26%)
  * Case D (both): 51.89M ops/s (+0.78% NEUTRAL)
- Action: Freeze as research box (default OFF, manual opt-in)

Phase 5 E5-2: Header Write-Once retest (promotion test)
- Verdict: NEUTRAL (+0.54%)
- Motivation: Phase 13 Case B showed +1.13%, re-tested with dedicated 20-run
- Results (20-run):
  * Case A (baseline): 51.10M ops/s
  * Case B (WRITE_ONCE=1): 51.37M ops/s (+0.54%)
- Previous test: +0.45% (consistent with NEUTRAL)
- Action: Keep as research box (default OFF, manual opt-in)

Key findings:
- Header write tax optimization shows consistent NEUTRAL results
- Neither Phase 13 v1 nor E5-2 reaches GO threshold (+1.0%)
- Both implemented as reversible ENV gates for future research

Files changed:
- New: core/box/tiny_c7_preserve_header_env_box.{c,h}
- Modified: core/box/tiny_layout_box.h (C7 offset conditional)
- Modified: core/tiny_nextptr.h, core/box/tiny_header_box.h (comments)
- Modified: core/bench_profile.h (refresh sync)
- Modified: Makefile (add new .o files)
- Modified: scripts/run_mixed_10_cleanenv.sh (add C7_PRESERVE ENV)
- Docs: PHASE13_*, PHASE5_E5_2_HEADER_WRITE_ONCE_* (design/results)

Next: Phase 14 (Pointer-chase reduction, tcache-style intrusive LIFO)

🤖 Generated with Claude Code

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-15 00:32:25 +09:00

229 lines
8.8 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// tiny_nextptr.h - Authoritative next-pointer offset/load/store for tiny boxes
//
// Finalized Phase E1-CORRECT spec (物理制約込み):
// P0.1 updated: C0 uses offset 0, C1-C6 use offset 1 (header preserved)
// Phase 13 v1: C7 uses offset 0 (default) or 1 (HAKMEM_TINY_C7_PRESERVE_HEADER=1)
//
// HAKMEM_TINY_HEADER_CLASSIDX != 0 のとき:
//
// Class 0:
// [1B header][7B payload] (total 8B stride)
// → 8B stride に 1B header + 8B next pointer は収まらない1B溢れる
// → next は base+0 に格納headerを上書き
// → next_off = 0
//
// Class 1〜6:
// [1B header][payload >= 15B] (stride >= 16B)
// → headerは保持し、next は header直後 base+1 に格納
// → next_off = 1
//
// Class 7:
// [1B header][payload 2047B]
// → next_off = 0 (default: headerは上書き)
// → next_off = 1 (Phase 13 v1: HAKMEM_TINY_C7_PRESERVE_HEADER=1)
//
// HAKMEM_TINY_HEADER_CLASSIDX == 0 のとき:
//
// 全クラス headerなし → next_off = 0
//
// このヘッダは上記仕様を唯一の真実として提供する。
// すべての tiny freelist / TLS / fast-cache / refill / SLL で
// tiny_next_off/tiny_next_load/tiny_next_store を経由すること。
// 直接の *(void**) アクセスやローカルな offset 分岐は使用禁止。
#ifndef TINY_NEXTPTR_H
#define TINY_NEXTPTR_H
#include <stdint.h>
#include <string.h>
#include <stdlib.h> // P2.3: for getenv()
#include "hakmem_build_flags.h"
#include "tiny_region_id.h" // HEADER_MAGIC/HEADER_CLASS_MASK for header repair/logging
#include "hakmem_super_registry.h" // hak_super_lookup
#include "superslab/superslab_inline.h" // slab_index_for
#include <stdio.h>
#include <stdatomic.h>
#include <dlfcn.h>
#include <execinfo.h> // backtrace for rare misalign diagnostics
#include "box/tiny_layout_box.h"
#include "box/tiny_header_box.h"
// Per-thread trace context injected by PTR_NEXT_WRITE macro (for triage)
static __thread const char* g_tiny_next_tag __attribute__((unused)) = NULL;
static __thread const char* g_tiny_next_file __attribute__((unused)) = NULL;
static __thread int g_tiny_next_line __attribute__((unused)) = 0;
static __thread void* g_tiny_next_ra0 __attribute__((unused)) = NULL;
static __thread void* g_tiny_next_ra1 __attribute__((unused)) = NULL;
static __thread void* g_tiny_next_ra2 __attribute__((unused)) = NULL;
// Compute freelist next-pointer offset within a block for the given class.
// P0.1: C0 uses offset 0, C1-C6 use offset 1 (header preserved)
// Phase 13 v1: C7 uses offset 0 (default) or 1 (HAKMEM_TINY_C7_PRESERVE_HEADER=1)
// Rationale for C0: 8B stride cannot fit [1B header][8B next pointer] without overflow
static inline __attribute__((always_inline)) size_t tiny_next_off(int class_idx) {
return tiny_nextptr_offset(class_idx);
}
#if !HAKMEM_BUILD_RELEASE
// Optional: log next-pointer writes for triage (env: HAKMEM_TINY_SLL_HEADLOG=1)
static inline void tiny_next_store_log(int class_idx, void* base, void* next, size_t off)
{
static int g_nextlog_en = 1; // default ON for triage; disable with HAKMEM_TINY_SLL_HEADLOG=0
static int g_nextlog_env_checked = 0;
static int g_nextlog_cls = -2; // -1 = no filter; >=0 = only that class
static const char* g_nextlog_tag_filter = NULL; // substring match; NULL = no filter
if (!g_nextlog_env_checked) {
const char* e = getenv("HAKMEM_TINY_SLL_HEADLOG");
if (e && *e == '0') {
g_nextlog_en = 0;
}
const char* c = getenv("HAKMEM_TINY_SLL_NEXTCLS");
if (c && *c) {
g_nextlog_cls = atoi(c);
} else {
g_nextlog_cls = -1;
}
g_nextlog_tag_filter = getenv("HAKMEM_TINY_SLL_NEXTTAG");
g_nextlog_env_checked = 1;
}
if (!__builtin_expect(g_nextlog_en, 0)) return;
if (g_nextlog_cls >= 0 && class_idx != g_nextlog_cls) return;
// Pull tag/callsite from TLS and clear immediately to avoid stale reuse
const char* tag = g_tiny_next_tag;
const char* file = g_tiny_next_file;
int line = g_tiny_next_line;
void* ra0 = g_tiny_next_ra0;
void* ra1 = g_tiny_next_ra1;
void* ra2 = g_tiny_next_ra2;
g_tiny_next_tag = NULL;
g_tiny_next_file = NULL;
g_tiny_next_line = 0;
g_tiny_next_ra0 = NULL;
g_tiny_next_ra1 = NULL;
g_tiny_next_ra2 = NULL;
if (!tag) return;
if (g_nextlog_tag_filter && !strstr(tag, g_nextlog_tag_filter)) return;
static _Atomic uint32_t g_nextlog_shot = 0;
uint32_t shot = atomic_fetch_add_explicit(&g_nextlog_shot, 1, memory_order_relaxed);
if (shot >= 256) return;
SuperSlab* ss = hak_super_lookup(base);
int cap = ss ? ss_slabs_capacity(ss) : 0;
int idx = (ss && ss->magic == SUPERSLAB_MAGIC) ? slab_index_for(ss, base) : -1;
uint8_t cls = (idx >= 0 && idx < cap) ? ss->slabs[idx].class_idx : 0xff;
void* ra = __builtin_return_address(0);
fprintf(stderr,
"[TINY_NEXT_STORE] shot=%u cls=%d base=%p next=%p off=%zu ss=%p idx=%d meta_cls=%u caller=%p tag=%s site=%s:%d ra0=%p ra1=%p ra2=%p\n",
shot + 1,
class_idx,
base,
next,
off,
(void*)ss,
idx,
(unsigned)cls,
ra,
tag,
file,
line,
ra0,
ra1,
ra2);
// Early frames for offline addr2line when caller symbols are missing
if (shot < 24) {
void* bt[16];
int frames = backtrace(bt, 16);
backtrace_symbols_fd(bt, frames, fileno(stderr));
}
// Backtrace only for clearly misaligned bases (likely user pointers)
if (((uintptr_t)base & 0xF) != 0) {
static _Atomic uint32_t g_next_bt = 0;
uint32_t bt_shot = atomic_fetch_add_explicit(&g_next_bt, 1, memory_order_relaxed);
if (bt_shot < 8) {
void* bt[16];
int frames = backtrace(bt, 16);
backtrace_symbols_fd(bt, frames, fileno(stderr));
}
}
}
#else
// Release build: no-op (triage logging disabled)
static inline void tiny_next_store_log(int class_idx, void* base, void* next, size_t off)
{
(void)class_idx;
(void)base;
(void)next;
(void)off;
}
#endif
// Safe load of next pointer from a block base.
static inline __attribute__((always_inline)) void* tiny_next_load(const void* base, int class_idx) {
size_t off = tiny_next_off(class_idx);
if (off == 0) {
// Aligned access at base (header無し or C7 freelist時)
void* next = *(void* const*)base;
// P3: Prevent compiler from reordering this load
__atomic_thread_fence(__ATOMIC_ACQUIRE);
return next;
}
// off != 0: use memcpy to avoid UB on architectures that forbid unaligned loads.
// C0-C6: offset 1 (header preserved)
void* next = NULL;
const uint8_t* p = (const uint8_t*)base + off;
memcpy(&next, p, sizeof(void*));
// P3: Prevent compiler from reordering this load
__atomic_thread_fence(__ATOMIC_ACQUIRE);
return next;
}
// Safe store of next pointer into a block base.
// P2.3: Header restoration is now conditional (default: skip when class_map is active)
// - When class_map is used for class_idx lookup (default), header restoration is unnecessary
// - Alloc path always writes fresh header before returning block to user (HAK_RET_ALLOC)
// - ENV: HAKMEM_TINY_RESTORE_HEADER=1 to force header restoration (legacy mode)
// P0.1: C0 uses offset 0 (overwrites header), C1-C6 use offset 1 (header preserved)
// Phase 13 v1: C7 uses offset 0 (default) or 1 (HAKMEM_TINY_C7_PRESERVE_HEADER=1)
static inline __attribute__((always_inline)) void tiny_next_store(void* base, int class_idx, void* next) {
size_t off = tiny_next_off(class_idx);
#if HAKMEM_TINY_HEADERLESS
// Headerless mode: never restore header
(void)class_idx;
#elif HAKMEM_TINY_HEADER_CLASSIDX
// P2.3: Skip header restoration by default (class_map is now default for class_idx lookup)
// ENV: HAKMEM_TINY_RESTORE_HEADER=1 to force header restoration (legacy fallback mode)
if (off != 0) {
static int g_restore_header = -1;
if (__builtin_expect(g_restore_header == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_RESTORE_HEADER");
g_restore_header = (e && *e && *e != '0') ? 1 : 0;
}
if (__builtin_expect(g_restore_header, 0)) {
// Legacy mode: Restore header for classes that preserve it (C0-C6)
tiny_header_write_if_preserved(base, class_idx);
}
}
#endif
if (off == 0) {
// Aligned access at base (overwrites header for C7).
*(void**)base = next;
tiny_next_store_log(class_idx, base, next, off);
return;
}
// off != 0: use memcpy for portability / UB-avoidance.
uint8_t* p = (uint8_t*)base + off;
memcpy(p, &next, sizeof(void*));
tiny_next_store_log(class_idx, base, next, off);
}
#endif // TINY_NEXTPTR_H