Files
hakmem/core/tiny_nextptr.h
Moe Charm (CI) d54893ea1d Phase 3 C3: Static Routing A/B Test ADOPT (+2.20% Mixed gain)
Step 2 & 3 Complete:
- A/B test (Mixed 10-run): STATIC_ROUTE=0 (38.91M) → =1 (39.77M) = +2.20% avg
  - Median gain: +1.98%
  - Result:  GO (exceeds +1.0% threshold)

- Decision:  ADOPT into MIXED_TINYV3_C7_SAFE preset
  - bench_profile.h line 77: HAKMEM_TINY_STATIC_ROUTE=1 default
  - Learner auto-disables static route when HAKMEM_SMALL_LEARNER_V7_ENABLED=1

Implementation Summary:
- core/box/tiny_static_route_box.{h,c}: Research box (Step 1A)
- core/front/malloc_tiny_fast.h: Route lookup integration (Step 1B, lines 249-256)
- core/bench_profile.h: Bench sync + preset adoption

Cumulative Phase 2-3 Gains:
- B3 (Routing shape): +2.89%
- B4 (Wrapper split): +1.47%
- C3 (Static routing): +2.20%
- Total: ~6.8% (35.2M → ~39.8M ops/s)

Next: Phase 3 C1 (TLS Prefetch, expected +2-4%)

🤖 Generated with Claude Code

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-13 18:46:11 +09:00

226 lines
8.6 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// tiny_nextptr.h - Authoritative next-pointer offset/load/store for tiny boxes
//
// Finalized Phase E1-CORRECT spec (物理制約込み):
// P0.1 updated: C0 and C7 use offset 0, C1-C6 use offset 1 (header preserved)
//
// HAKMEM_TINY_HEADER_CLASSIDX != 0 のとき:
//
// Class 0:
// [1B header][7B payload] (total 8B stride)
// → 8B stride に 1B header + 8B next pointer は収まらない1B溢れる
// → next は base+0 に格納headerを上書き
// → next_off = 0
//
// Class 1〜6:
// [1B header][payload >= 15B] (stride >= 16B)
// → headerは保持し、next は header直後 base+1 に格納
// → next_off = 1
//
// Class 7:
// [1B header][payload 2047B]
// → headerは上書きし、next は base+0 に格納(最大サイズなので許容)
// → next_off = 0
//
// HAKMEM_TINY_HEADER_CLASSIDX == 0 のとき:
//
// 全クラス headerなし → next_off = 0
//
// このヘッダは上記仕様を唯一の真実として提供する。
// すべての tiny freelist / TLS / fast-cache / refill / SLL で
// tiny_next_off/tiny_next_load/tiny_next_store を経由すること。
// 直接の *(void**) アクセスやローカルな offset 分岐は使用禁止。
#ifndef TINY_NEXTPTR_H
#define TINY_NEXTPTR_H
#include <stdint.h>
#include <string.h>
#include <stdlib.h> // P2.3: for getenv()
#include "hakmem_build_flags.h"
#include "tiny_region_id.h" // HEADER_MAGIC/HEADER_CLASS_MASK for header repair/logging
#include "hakmem_super_registry.h" // hak_super_lookup
#include "superslab/superslab_inline.h" // slab_index_for
#include <stdio.h>
#include <stdatomic.h>
#include <dlfcn.h>
#include <execinfo.h> // backtrace for rare misalign diagnostics
#include "box/tiny_layout_box.h"
#include "box/tiny_header_box.h"
// Per-thread trace context injected by PTR_NEXT_WRITE macro (for triage)
static __thread const char* g_tiny_next_tag __attribute__((unused)) = NULL;
static __thread const char* g_tiny_next_file __attribute__((unused)) = NULL;
static __thread int g_tiny_next_line __attribute__((unused)) = 0;
static __thread void* g_tiny_next_ra0 __attribute__((unused)) = NULL;
static __thread void* g_tiny_next_ra1 __attribute__((unused)) = NULL;
static __thread void* g_tiny_next_ra2 __attribute__((unused)) = NULL;
// Compute freelist next-pointer offset within a block for the given class.
// P0.1 updated: C0 and C7 use offset 0, C1-C6 use offset 1 (header preserved)
// Rationale for C0: 8B stride cannot fit [1B header][8B next pointer] without overflow
static inline __attribute__((always_inline)) size_t tiny_next_off(int class_idx) {
return tiny_nextptr_offset(class_idx);
}
#if !HAKMEM_BUILD_RELEASE
// Optional: log next-pointer writes for triage (env: HAKMEM_TINY_SLL_HEADLOG=1)
static inline void tiny_next_store_log(int class_idx, void* base, void* next, size_t off)
{
static int g_nextlog_en = 1; // default ON for triage; disable with HAKMEM_TINY_SLL_HEADLOG=0
static int g_nextlog_env_checked = 0;
static int g_nextlog_cls = -2; // -1 = no filter; >=0 = only that class
static const char* g_nextlog_tag_filter = NULL; // substring match; NULL = no filter
if (!g_nextlog_env_checked) {
const char* e = getenv("HAKMEM_TINY_SLL_HEADLOG");
if (e && *e == '0') {
g_nextlog_en = 0;
}
const char* c = getenv("HAKMEM_TINY_SLL_NEXTCLS");
if (c && *c) {
g_nextlog_cls = atoi(c);
} else {
g_nextlog_cls = -1;
}
g_nextlog_tag_filter = getenv("HAKMEM_TINY_SLL_NEXTTAG");
g_nextlog_env_checked = 1;
}
if (!__builtin_expect(g_nextlog_en, 0)) return;
if (g_nextlog_cls >= 0 && class_idx != g_nextlog_cls) return;
// Pull tag/callsite from TLS and clear immediately to avoid stale reuse
const char* tag = g_tiny_next_tag;
const char* file = g_tiny_next_file;
int line = g_tiny_next_line;
void* ra0 = g_tiny_next_ra0;
void* ra1 = g_tiny_next_ra1;
void* ra2 = g_tiny_next_ra2;
g_tiny_next_tag = NULL;
g_tiny_next_file = NULL;
g_tiny_next_line = 0;
g_tiny_next_ra0 = NULL;
g_tiny_next_ra1 = NULL;
g_tiny_next_ra2 = NULL;
if (!tag) return;
if (g_nextlog_tag_filter && !strstr(tag, g_nextlog_tag_filter)) return;
static _Atomic uint32_t g_nextlog_shot = 0;
uint32_t shot = atomic_fetch_add_explicit(&g_nextlog_shot, 1, memory_order_relaxed);
if (shot >= 256) return;
SuperSlab* ss = hak_super_lookup(base);
int cap = ss ? ss_slabs_capacity(ss) : 0;
int idx = (ss && ss->magic == SUPERSLAB_MAGIC) ? slab_index_for(ss, base) : -1;
uint8_t cls = (idx >= 0 && idx < cap) ? ss->slabs[idx].class_idx : 0xff;
void* ra = __builtin_return_address(0);
fprintf(stderr,
"[TINY_NEXT_STORE] shot=%u cls=%d base=%p next=%p off=%zu ss=%p idx=%d meta_cls=%u caller=%p tag=%s site=%s:%d ra0=%p ra1=%p ra2=%p\n",
shot + 1,
class_idx,
base,
next,
off,
(void*)ss,
idx,
(unsigned)cls,
ra,
tag,
file,
line,
ra0,
ra1,
ra2);
// Early frames for offline addr2line when caller symbols are missing
if (shot < 24) {
void* bt[16];
int frames = backtrace(bt, 16);
backtrace_symbols_fd(bt, frames, fileno(stderr));
}
// Backtrace only for clearly misaligned bases (likely user pointers)
if (((uintptr_t)base & 0xF) != 0) {
static _Atomic uint32_t g_next_bt = 0;
uint32_t bt_shot = atomic_fetch_add_explicit(&g_next_bt, 1, memory_order_relaxed);
if (bt_shot < 8) {
void* bt[16];
int frames = backtrace(bt, 16);
backtrace_symbols_fd(bt, frames, fileno(stderr));
}
}
}
#else
// Release build: no-op (triage logging disabled)
static inline void tiny_next_store_log(int class_idx, void* base, void* next, size_t off)
{
(void)class_idx;
(void)base;
(void)next;
(void)off;
}
#endif
// Safe load of next pointer from a block base.
static inline __attribute__((always_inline)) void* tiny_next_load(const void* base, int class_idx) {
size_t off = tiny_next_off(class_idx);
if (off == 0) {
// Aligned access at base (header無し or C7 freelist時)
void* next = *(void* const*)base;
// P3: Prevent compiler from reordering this load
__atomic_thread_fence(__ATOMIC_ACQUIRE);
return next;
}
// off != 0: use memcpy to avoid UB on architectures that forbid unaligned loads.
// C0-C6: offset 1 (header preserved)
void* next = NULL;
const uint8_t* p = (const uint8_t*)base + off;
memcpy(&next, p, sizeof(void*));
// P3: Prevent compiler from reordering this load
__atomic_thread_fence(__ATOMIC_ACQUIRE);
return next;
}
// Safe store of next pointer into a block base.
// P2.3: Header restoration is now conditional (default: skip when class_map is active)
// - When class_map is used for class_idx lookup (default), header restoration is unnecessary
// - Alloc path always writes fresh header before returning block to user (HAK_RET_ALLOC)
// - ENV: HAKMEM_TINY_RESTORE_HEADER=1 to force header restoration (legacy mode)
// P0.1: C7 uses offset 0 (overwrites header), C0-C6 use offset 1 (header preserved)
static inline __attribute__((always_inline)) void tiny_next_store(void* base, int class_idx, void* next) {
size_t off = tiny_next_off(class_idx);
#if HAKMEM_TINY_HEADERLESS
// Headerless mode: never restore header
(void)class_idx;
#elif HAKMEM_TINY_HEADER_CLASSIDX
// P2.3: Skip header restoration by default (class_map is now default for class_idx lookup)
// ENV: HAKMEM_TINY_RESTORE_HEADER=1 to force header restoration (legacy fallback mode)
if (off != 0) {
static int g_restore_header = -1;
if (__builtin_expect(g_restore_header == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_RESTORE_HEADER");
g_restore_header = (e && *e && *e != '0') ? 1 : 0;
}
if (__builtin_expect(g_restore_header, 0)) {
// Legacy mode: Restore header for classes that preserve it (C0-C6)
tiny_header_write_if_preserved(base, class_idx);
}
}
#endif
if (off == 0) {
// Aligned access at base (overwrites header for C7).
*(void**)base = next;
tiny_next_store_log(class_idx, base, next, off);
return;
}
// off != 0: use memcpy for portability / UB-avoidance.
uint8_t* p = (uint8_t*)base + off;
memcpy(p, &next, sizeof(void*));
tiny_next_store_log(class_idx, base, next, off);
}
#endif // TINY_NEXTPTR_H