## Summary - ChatGPT により bench_profile.h の setenv segfault を修正(RTLD_NEXT 経由に切り替え) - core/box/pool_zero_mode_box.h 新設:ENV キャッシュ経由で ZERO_MODE を統一管理 - core/hakmem_pool.c で zero mode に応じた memset 制御(FULL/header/off) - A/B テスト結果:ZERO_MODE=header で +15.34% improvement(1M iterations, C6-heavy) ## Files Modified - core/box/pool_api.inc.h: pool_zero_mode_box.h include - core/bench_profile.h: glibc setenv → malloc+putenv(segfault 回避) - core/hakmem_pool.c: zero mode 参照・制御ロジック - core/box/pool_zero_mode_box.h (新設): enum/getter - CURRENT_TASK.md: Phase ML1 結果記載 ## Test Results | Iterations | ZERO_MODE=full | ZERO_MODE=header | Improvement | |-----------|----------------|-----------------|------------| | 10K | 3.06 M ops/s | 3.17 M ops/s | +3.65% | | 1M | 23.71 M ops/s | 27.34 M ops/s | **+15.34%** | 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
226 lines
8.6 KiB
C
226 lines
8.6 KiB
C
// tiny_nextptr.h - Authoritative next-pointer offset/load/store for tiny boxes
|
||
//
|
||
// Finalized Phase E1-CORRECT spec (物理制約込み):
|
||
// P0.1 updated: C0 and C7 use offset 0, C1-C6 use offset 1 (header preserved)
|
||
//
|
||
// HAKMEM_TINY_HEADER_CLASSIDX != 0 のとき:
|
||
//
|
||
// Class 0:
|
||
// [1B header][7B payload] (total 8B stride)
|
||
// → 8B stride に 1B header + 8B next pointer は収まらない(1B溢れる)
|
||
// → next は base+0 に格納(headerを上書き)
|
||
// → next_off = 0
|
||
//
|
||
// Class 1〜6:
|
||
// [1B header][payload >= 15B] (stride >= 16B)
|
||
// → headerは保持し、next は header直後 base+1 に格納
|
||
// → next_off = 1
|
||
//
|
||
// Class 7:
|
||
// [1B header][payload 2047B]
|
||
// → headerは上書きし、next は base+0 に格納(最大サイズなので許容)
|
||
// → next_off = 0
|
||
//
|
||
// HAKMEM_TINY_HEADER_CLASSIDX == 0 のとき:
|
||
//
|
||
// 全クラス headerなし → next_off = 0
|
||
//
|
||
// このヘッダは上記仕様を唯一の真実として提供する。
|
||
// すべての tiny freelist / TLS / fast-cache / refill / SLL で
|
||
// tiny_next_off/tiny_next_load/tiny_next_store を経由すること。
|
||
// 直接の *(void**) アクセスやローカルな offset 分岐は使用禁止。
|
||
|
||
#ifndef TINY_NEXTPTR_H
|
||
#define TINY_NEXTPTR_H
|
||
|
||
#include <stdint.h>
|
||
#include <string.h>
|
||
#include <stdlib.h> // P2.3: for getenv()
|
||
#include "hakmem_build_flags.h"
|
||
#include "tiny_region_id.h" // HEADER_MAGIC/HEADER_CLASS_MASK for header repair/logging
|
||
#include "hakmem_super_registry.h" // hak_super_lookup
|
||
#include "superslab/superslab_inline.h" // slab_index_for
|
||
#include <stdio.h>
|
||
#include <stdatomic.h>
|
||
#include <dlfcn.h>
|
||
#include <execinfo.h> // backtrace for rare misalign diagnostics
|
||
#include "box/tiny_layout_box.h"
|
||
#include "box/tiny_header_box.h"
|
||
|
||
// Per-thread trace context injected by PTR_NEXT_WRITE macro (for triage)
|
||
static __thread const char* g_tiny_next_tag __attribute__((unused)) = NULL;
|
||
static __thread const char* g_tiny_next_file __attribute__((unused)) = NULL;
|
||
static __thread int g_tiny_next_line __attribute__((unused)) = 0;
|
||
static __thread void* g_tiny_next_ra0 __attribute__((unused)) = NULL;
|
||
static __thread void* g_tiny_next_ra1 __attribute__((unused)) = NULL;
|
||
static __thread void* g_tiny_next_ra2 __attribute__((unused)) = NULL;
|
||
|
||
// Compute freelist next-pointer offset within a block for the given class.
|
||
// P0.1 updated: C0 and C7 use offset 0, C1-C6 use offset 1 (header preserved)
|
||
// Rationale for C0: 8B stride cannot fit [1B header][8B next pointer] without overflow
|
||
static inline __attribute__((always_inline)) size_t tiny_next_off(int class_idx) {
|
||
return tiny_user_offset(class_idx);
|
||
}
|
||
|
||
#if !HAKMEM_BUILD_RELEASE
|
||
// Optional: log next-pointer writes for triage (env: HAKMEM_TINY_SLL_HEADLOG=1)
|
||
static inline void tiny_next_store_log(int class_idx, void* base, void* next, size_t off)
|
||
{
|
||
static int g_nextlog_en = 1; // default ON for triage; disable with HAKMEM_TINY_SLL_HEADLOG=0
|
||
static int g_nextlog_env_checked = 0;
|
||
static int g_nextlog_cls = -2; // -1 = no filter; >=0 = only that class
|
||
static const char* g_nextlog_tag_filter = NULL; // substring match; NULL = no filter
|
||
if (!g_nextlog_env_checked) {
|
||
const char* e = getenv("HAKMEM_TINY_SLL_HEADLOG");
|
||
if (e && *e == '0') {
|
||
g_nextlog_en = 0;
|
||
}
|
||
const char* c = getenv("HAKMEM_TINY_SLL_NEXTCLS");
|
||
if (c && *c) {
|
||
g_nextlog_cls = atoi(c);
|
||
} else {
|
||
g_nextlog_cls = -1;
|
||
}
|
||
g_nextlog_tag_filter = getenv("HAKMEM_TINY_SLL_NEXTTAG");
|
||
g_nextlog_env_checked = 1;
|
||
}
|
||
if (!__builtin_expect(g_nextlog_en, 0)) return;
|
||
if (g_nextlog_cls >= 0 && class_idx != g_nextlog_cls) return;
|
||
|
||
// Pull tag/callsite from TLS and clear immediately to avoid stale reuse
|
||
const char* tag = g_tiny_next_tag;
|
||
const char* file = g_tiny_next_file;
|
||
int line = g_tiny_next_line;
|
||
void* ra0 = g_tiny_next_ra0;
|
||
void* ra1 = g_tiny_next_ra1;
|
||
void* ra2 = g_tiny_next_ra2;
|
||
g_tiny_next_tag = NULL;
|
||
g_tiny_next_file = NULL;
|
||
g_tiny_next_line = 0;
|
||
g_tiny_next_ra0 = NULL;
|
||
g_tiny_next_ra1 = NULL;
|
||
g_tiny_next_ra2 = NULL;
|
||
if (!tag) return;
|
||
if (g_nextlog_tag_filter && !strstr(tag, g_nextlog_tag_filter)) return;
|
||
|
||
static _Atomic uint32_t g_nextlog_shot = 0;
|
||
uint32_t shot = atomic_fetch_add_explicit(&g_nextlog_shot, 1, memory_order_relaxed);
|
||
if (shot >= 256) return;
|
||
|
||
SuperSlab* ss = hak_super_lookup(base);
|
||
int cap = ss ? ss_slabs_capacity(ss) : 0;
|
||
int idx = (ss && ss->magic == SUPERSLAB_MAGIC) ? slab_index_for(ss, base) : -1;
|
||
uint8_t cls = (idx >= 0 && idx < cap) ? ss->slabs[idx].class_idx : 0xff;
|
||
void* ra = __builtin_return_address(0);
|
||
fprintf(stderr,
|
||
"[TINY_NEXT_STORE] shot=%u cls=%d base=%p next=%p off=%zu ss=%p idx=%d meta_cls=%u caller=%p tag=%s site=%s:%d ra0=%p ra1=%p ra2=%p\n",
|
||
shot + 1,
|
||
class_idx,
|
||
base,
|
||
next,
|
||
off,
|
||
(void*)ss,
|
||
idx,
|
||
(unsigned)cls,
|
||
ra,
|
||
tag,
|
||
file,
|
||
line,
|
||
ra0,
|
||
ra1,
|
||
ra2);
|
||
// Early frames for offline addr2line when caller symbols are missing
|
||
if (shot < 24) {
|
||
void* bt[16];
|
||
int frames = backtrace(bt, 16);
|
||
backtrace_symbols_fd(bt, frames, fileno(stderr));
|
||
}
|
||
// Backtrace only for clearly misaligned bases (likely user pointers)
|
||
if (((uintptr_t)base & 0xF) != 0) {
|
||
static _Atomic uint32_t g_next_bt = 0;
|
||
uint32_t bt_shot = atomic_fetch_add_explicit(&g_next_bt, 1, memory_order_relaxed);
|
||
if (bt_shot < 8) {
|
||
void* bt[16];
|
||
int frames = backtrace(bt, 16);
|
||
backtrace_symbols_fd(bt, frames, fileno(stderr));
|
||
}
|
||
}
|
||
}
|
||
#else
|
||
// Release build: no-op (triage logging disabled)
|
||
static inline void tiny_next_store_log(int class_idx, void* base, void* next, size_t off)
|
||
{
|
||
(void)class_idx;
|
||
(void)base;
|
||
(void)next;
|
||
(void)off;
|
||
}
|
||
#endif
|
||
|
||
// Safe load of next pointer from a block base.
|
||
static inline __attribute__((always_inline)) void* tiny_next_load(const void* base, int class_idx) {
|
||
size_t off = tiny_next_off(class_idx);
|
||
|
||
if (off == 0) {
|
||
// Aligned access at base (header無し or C7 freelist時)
|
||
void* next = *(void* const*)base;
|
||
|
||
// P3: Prevent compiler from reordering this load
|
||
__atomic_thread_fence(__ATOMIC_ACQUIRE);
|
||
return next;
|
||
}
|
||
|
||
// off != 0: use memcpy to avoid UB on architectures that forbid unaligned loads.
|
||
// C0-C6: offset 1 (header preserved)
|
||
void* next = NULL;
|
||
const uint8_t* p = (const uint8_t*)base + off;
|
||
memcpy(&next, p, sizeof(void*));
|
||
|
||
// P3: Prevent compiler from reordering this load
|
||
__atomic_thread_fence(__ATOMIC_ACQUIRE);
|
||
return next;
|
||
}
|
||
|
||
// Safe store of next pointer into a block base.
|
||
// P2.3: Header restoration is now conditional (default: skip when class_map is active)
|
||
// - When class_map is used for class_idx lookup (default), header restoration is unnecessary
|
||
// - Alloc path always writes fresh header before returning block to user (HAK_RET_ALLOC)
|
||
// - ENV: HAKMEM_TINY_RESTORE_HEADER=1 to force header restoration (legacy mode)
|
||
// P0.1: C7 uses offset 0 (overwrites header), C0-C6 use offset 1 (header preserved)
|
||
static inline __attribute__((always_inline)) void tiny_next_store(void* base, int class_idx, void* next) {
|
||
size_t off = tiny_next_off(class_idx);
|
||
|
||
#if HAKMEM_TINY_HEADERLESS
|
||
// Headerless mode: never restore header
|
||
(void)class_idx;
|
||
#elif HAKMEM_TINY_HEADER_CLASSIDX
|
||
// P2.3: Skip header restoration by default (class_map is now default for class_idx lookup)
|
||
// ENV: HAKMEM_TINY_RESTORE_HEADER=1 to force header restoration (legacy fallback mode)
|
||
if (off != 0) {
|
||
static int g_restore_header = -1;
|
||
if (__builtin_expect(g_restore_header == -1, 0)) {
|
||
const char* e = getenv("HAKMEM_TINY_RESTORE_HEADER");
|
||
g_restore_header = (e && *e && *e != '0') ? 1 : 0;
|
||
}
|
||
if (__builtin_expect(g_restore_header, 0)) {
|
||
// Legacy mode: Restore header for classes that preserve it (C0-C6)
|
||
tiny_header_write_if_preserved(base, class_idx);
|
||
}
|
||
}
|
||
#endif
|
||
|
||
if (off == 0) {
|
||
// Aligned access at base (overwrites header for C7).
|
||
*(void**)base = next;
|
||
tiny_next_store_log(class_idx, base, next, off);
|
||
return;
|
||
}
|
||
|
||
// off != 0: use memcpy for portability / UB-avoidance.
|
||
uint8_t* p = (uint8_t*)base + off;
|
||
memcpy(p, &next, sizeof(void*));
|
||
tiny_next_store_log(class_idx, base, next, off);
|
||
}
|
||
|
||
#endif // TINY_NEXTPTR_H
|