hakmem/core/tiny_nextptr.h

// tiny_nextptr.h - Authoritative next-pointer offset/load/store for tiny boxes
//
// Finalized Phase E1-CORRECT spec (物理制約込み):
// P0.1 updated: C0 and C7 use offset 0, C1-C6 use offset 1 (header preserved)
//
// HAKMEM_TINY_HEADER_CLASSIDX != 0 のとき:
//
//   Class 0:
//     [1B header][7B payload] (total 8B stride)
//     → 8B stride に 1B header + 8B next pointer は収まらない（1B溢れる）
//     → next は base+0 に格納（headerを上書き）
//     → next_off = 0
//
//   Class 1〜6:
//     [1B header][payload >= 15B] (stride >= 16B)
//     → headerは保持し、next は header直後 base+1 に格納
//     → next_off = 1
//
//   Class 7:
//     [1B header][payload 2047B]
//     → headerは上書きし、next は base+0 に格納（最大サイズなので許容）
//     → next_off = 0
//
// HAKMEM_TINY_HEADER_CLASSIDX == 0 のとき:
//
//   全クラス headerなし → next_off = 0
//
// このヘッダは上記仕様を唯一の真実として提供する。
// すべての tiny freelist / TLS / fast-cache / refill / SLL で
// tiny_next_off/tiny_next_load/tiny_next_store を経由すること。
// 直接の *(void**) アクセスやローカルな offset 分岐は使用禁止。

#ifndef TINY_NEXTPTR_H
#define TINY_NEXTPTR_H

#include <stdint.h>
#include <string.h>
#include <stdlib.h>  // P2.3: for getenv()
#include "hakmem_build_flags.h"
#include "tiny_region_id.h"  // HEADER_MAGIC/HEADER_CLASS_MASK for header repair/logging
#include "hakmem_super_registry.h"  // hak_super_lookup
#include "superslab/superslab_inline.h"  // slab_index_for
#include <stdio.h>
#include <stdatomic.h>
#include <dlfcn.h>
#include <execinfo.h>  // backtrace for rare misalign diagnostics
#include "box/tiny_layout_box.h"
#include "box/tiny_header_box.h"

// Per-thread trace context injected by PTR_NEXT_WRITE macro (for triage)
static __thread const char* g_tiny_next_tag = NULL;
static __thread const char* g_tiny_next_file = NULL;
static __thread int g_tiny_next_line = 0;
static __thread void* g_tiny_next_ra0 = NULL;
static __thread void* g_tiny_next_ra1 = NULL;
static __thread void* g_tiny_next_ra2 = NULL;

// Compute freelist next-pointer offset within a block for the given class.
// P0.1 updated: C0 and C7 use offset 0, C1-C6 use offset 1 (header preserved)
// Rationale for C0: 8B stride cannot fit [1B header][8B next pointer] without overflow
static inline __attribute__((always_inline)) size_t tiny_next_off(int class_idx) {
    return tiny_user_offset(class_idx);
}

#if !HAKMEM_BUILD_RELEASE
// Optional: log next-pointer writes for triage (env: HAKMEM_TINY_SLL_HEADLOG=1)
static inline void tiny_next_store_log(int class_idx, void* base, void* next, size_t off)
{
    static int g_nextlog_en = 1;  // default ON for triage; disable with HAKMEM_TINY_SLL_HEADLOG=0
    static int g_nextlog_env_checked = 0;
    static int g_nextlog_cls = -2;  // -1 = no filter; >=0 = only that class
    static const char* g_nextlog_tag_filter = NULL;  // substring match; NULL = no filter
    if (!g_nextlog_env_checked) {
        const char* e = getenv("HAKMEM_TINY_SLL_HEADLOG");
        if (e && *e == '0') {
            g_nextlog_en = 0;
        }
        const char* c = getenv("HAKMEM_TINY_SLL_NEXTCLS");
        if (c && *c) {
            g_nextlog_cls = atoi(c);
        } else {
            g_nextlog_cls = -1;
        }
        g_nextlog_tag_filter = getenv("HAKMEM_TINY_SLL_NEXTTAG");
        g_nextlog_env_checked = 1;
    }
    if (!__builtin_expect(g_nextlog_en, 0)) return;
    if (g_nextlog_cls >= 0 && class_idx != g_nextlog_cls) return;

    // Pull tag/callsite from TLS and clear immediately to avoid stale reuse
    const char* tag = g_tiny_next_tag;
    const char* file = g_tiny_next_file;
    int line = g_tiny_next_line;
    void* ra0 = g_tiny_next_ra0;
    void* ra1 = g_tiny_next_ra1;
    void* ra2 = g_tiny_next_ra2;
    g_tiny_next_tag = NULL;
    g_tiny_next_file = NULL;
    g_tiny_next_line = 0;
    g_tiny_next_ra0 = NULL;
    g_tiny_next_ra1 = NULL;
    g_tiny_next_ra2 = NULL;
    if (!tag) return;
    if (g_nextlog_tag_filter && !strstr(tag, g_nextlog_tag_filter)) return;

    static _Atomic uint32_t g_nextlog_shot = 0;
    uint32_t shot = atomic_fetch_add_explicit(&g_nextlog_shot, 1, memory_order_relaxed);
    if (shot >= 256) return;

    SuperSlab* ss = hak_super_lookup(base);
    int cap = ss ? ss_slabs_capacity(ss) : 0;
    int idx = (ss && ss->magic == SUPERSLAB_MAGIC) ? slab_index_for(ss, base) : -1;
    uint8_t cls = (idx >= 0 && idx < cap) ? ss->slabs[idx].class_idx : 0xff;
    void* ra = __builtin_return_address(0);
    fprintf(stderr,
            "[TINY_NEXT_STORE] shot=%u cls=%d base=%p next=%p off=%zu ss=%p idx=%d meta_cls=%u caller=%p tag=%s site=%s:%d ra0=%p ra1=%p ra2=%p\n",
            shot + 1,
            class_idx,
            base,
            next,
            off,
            (void*)ss,
            idx,
            (unsigned)cls,
            ra,
            tag,
            file,
            line,
            ra0,
            ra1,
            ra2);
    // Early frames for offline addr2line when caller symbols are missing
    if (shot < 24) {
        void* bt[16];
        int frames = backtrace(bt, 16);
        backtrace_symbols_fd(bt, frames, fileno(stderr));
    }
    // Backtrace only for clearly misaligned bases (likely user pointers)
    if (((uintptr_t)base & 0xF) != 0) {
        static _Atomic uint32_t g_next_bt = 0;
        uint32_t bt_shot = atomic_fetch_add_explicit(&g_next_bt, 1, memory_order_relaxed);
        if (bt_shot < 8) {
            void* bt[16];
            int frames = backtrace(bt, 16);
            backtrace_symbols_fd(bt, frames, fileno(stderr));
        }
    }
}
#else
// Release build: no-op (triage logging disabled)
static inline void tiny_next_store_log(int class_idx, void* base, void* next, size_t off)
{
    (void)class_idx;
    (void)base;
    (void)next;
    (void)off;
}
#endif

// Safe load of next pointer from a block base.
static inline __attribute__((always_inline)) void* tiny_next_load(const void* base, int class_idx) {
    size_t off = tiny_next_off(class_idx);

    if (off == 0) {
        // Aligned access at base (header無し or C7 freelist時)
        void* next = *(void* const*)base;

        // P3: Prevent compiler from reordering this load
        __atomic_thread_fence(__ATOMIC_ACQUIRE);
        return next;
    }

    // off != 0: use memcpy to avoid UB on architectures that forbid unaligned loads.
    // C0-C6: offset 1 (header preserved)
    void* next = NULL;
    const uint8_t* p = (const uint8_t*)base + off;
    memcpy(&next, p, sizeof(void*));

    // P3: Prevent compiler from reordering this load
    __atomic_thread_fence(__ATOMIC_ACQUIRE);
    return next;
}

// Safe store of next pointer into a block base.
// P2.3: Header restoration is now conditional (default: skip when class_map is active)
// - When class_map is used for class_idx lookup (default), header restoration is unnecessary
// - Alloc path always writes fresh header before returning block to user (HAK_RET_ALLOC)
// - ENV: HAKMEM_TINY_RESTORE_HEADER=1 to force header restoration (legacy mode)
// P0.1: C7 uses offset 0 (overwrites header), C0-C6 use offset 1 (header preserved)
static inline __attribute__((always_inline)) void tiny_next_store(void* base, int class_idx, void* next) {
    size_t off = tiny_next_off(class_idx);

#if HAKMEM_TINY_HEADERLESS
    // Headerless mode: never restore header
    (void)class_idx;
#elif HAKMEM_TINY_HEADER_CLASSIDX
    // P2.3: Skip header restoration by default (class_map is now default for class_idx lookup)
    // ENV: HAKMEM_TINY_RESTORE_HEADER=1 to force header restoration (legacy fallback mode)
    if (off != 0) {
        static int g_restore_header = -1;
        if (__builtin_expect(g_restore_header == -1, 0)) {
            const char* e = getenv("HAKMEM_TINY_RESTORE_HEADER");
            g_restore_header = (e && *e && *e != '0') ? 1 : 0;
        }
        if (__builtin_expect(g_restore_header, 0)) {
            // Legacy mode: Restore header for classes that preserve it (C0-C6)
            tiny_header_write_if_preserved(base, class_idx);
        }
    }
#endif

    if (off == 0) {
        // Aligned access at base (overwrites header for C7).
        *(void**)base = next;
        tiny_next_store_log(class_idx, base, next, off);
        return;
    }

    // off != 0: use memcpy for portability / UB-avoidance.
    uint8_t* p = (uint8_t*)base + off;
    memcpy(p, &next, sizeof(void*));
    tiny_next_store_log(class_idx, base, next, off);
}

#endif // TINY_NEXTPTR_H