Files
hakmem/core/hakmem_tiny.c

2031 lines
88 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "hakmem_tiny.h"
#include "hakmem_tiny_config.h" // Centralized configuration
#include "hakmem_phase7_config.h" // Phase 7: Task 3 constants (PREWARM_COUNT, etc.)
#include "hakmem_tiny_superslab.h" // Phase 6.22: SuperSlab allocator
#include "hakmem_super_registry.h" // Phase 8.2: SuperSlab registry for memory profiling
#include "hakmem_internal.h"
#include "hakmem_syscall.h" // Phase 6.X P0 Fix: Box 3 syscall layer (bypasses LD_PRELOAD)
#include "hakmem_tiny_magazine.h"
#include "hakmem_tiny_integrity.h" // PRIORITY 1-4: Corruption detection
#include "box/tiny_next_ptr_box.h" // Box API: next pointer read/write
// Phase 1 modules (must come AFTER hakmem_tiny.h for TinyPool definition)
#include "hakmem_tiny_batch_refill.h" // Phase 1: Batch refill/spill for mini-magazine
#include "hakmem_tiny_stats.h" // Phase 1: Batched statistics (replaces XOR RNG)
// Phase 2B modules
#include "tiny_api.h" // Consolidated: stats_api, query_api, rss_api, registry_api
#include "tiny_tls.h"
#include "tiny_debug.h"
#include "tiny_mmap_gate.h"
#include "tiny_debug_ring.h"
#include "tiny_route.h"
#include "tiny_tls_guard.h"
#include "tiny_ready.h"
#include "hakmem_tiny_tls_list.h"
#include "hakmem_tiny_remote_target.h" // Phase 2C-1: Remote target queue
#include "hakmem_tiny_bg_spill.h" // Phase 2C-2: Background spill queue
#include "tiny_adaptive_sizing.h" // Phase 2b: Adaptive TLS cache sizing
// NOTE: hakmem_tiny_tls_ops.h included later (after type definitions)
#include "tiny_system.h" // Consolidated: stdio, stdlib, string, etc.
#include "hakmem_prof.h"
#include "hakmem_trace.h" // Optional USDT (perf) tracepoints
extern uint64_t g_bytes_allocated; // from hakmem_tiny_superslab.c
// ============================================================================
// Size class table (Box 3 dependency)
// ============================================================================
// Phase E1-CORRECT: ALL classes have 1-byte header
// These sizes represent TOTAL BLOCK SIZE (stride) = [Header 1B][Data N-1B]
// Usable data = stride - 1 (implicit)
const size_t g_tiny_class_sizes[TINY_NUM_CLASSES] = {
8, // Class 0: 8B total = [Header 1B][Data 7B]
16, // Class 1: 16B total = [Header 1B][Data 15B]
32, // Class 2: 32B total = [Header 1B][Data 31B]
64, // Class 3: 64B total = [Header 1B][Data 63B]
128, // Class 4: 128B total = [Header 1B][Data 127B]
256, // Class 5: 256B total = [Header 1B][Data 255B]
512, // Class 6: 512B total = [Header 1B][Data 511B]
1024 // Class 7: 1024B total = [Header 1B][Data 1023B]
};
// ============================================================================
// PRIORITY 1-4: Integrity Check Counters
// ============================================================================
_Atomic uint64_t g_integrity_check_class_bounds = 0;
_Atomic uint64_t g_integrity_check_freelist = 0;
_Atomic uint64_t g_integrity_check_canary = 0;
_Atomic uint64_t g_integrity_check_header = 0;
// Build-time gate for debug counters (path/ultra). Default OFF.
#ifndef HAKMEM_DEBUG_COUNTERS
#define HAKMEM_DEBUG_COUNTERS 0
#endif
int g_debug_fast0 = 0;
int g_debug_remote_guard = 0;
int g_remote_force_notify = 0;
// Tiny free safety (debug)
int g_tiny_safe_free = 0; // Default OFF for performance; env: HAKMEM_SAFE_FREE=1 でON
int g_tiny_safe_free_strict = 0; // env: HAKMEM_SAFE_FREE_STRICT=1
int g_tiny_force_remote = 0; // env: HAKMEM_TINY_FORCE_REMOTE=1
// Hot-class optimization: enable dedicated class5 (256B) TLS fast path
// Env: HAKMEM_TINY_HOTPATH_CLASS5=1/0 (default: 1)
int g_tiny_hotpath_class5 = 1;
// (moved) tiny_class5_stats_dump is defined later, after TLS vars
// Build-time gate: Minimal Tiny front (bench-only)
static inline int superslab_trace_enabled(void) {
static int g_ss_trace_flag = -1;
if (__builtin_expect(g_ss_trace_flag == -1, 0)) {
const char* tr = getenv("HAKMEM_TINY_SUPERSLAB_TRACE");
g_ss_trace_flag = (tr && atoi(tr) != 0) ? 1 : 0;
}
return g_ss_trace_flag;
}
// When enabled, physically excludes optional front tiers from the hot path
// (UltraFront/Quick/Frontend/HotMag/SS-try/BumpShadow), leaving:
// SLL → TLS Magazine → SuperSlab → (remaining slow path)
#ifndef HAKMEM_TINY_MINIMAL_FRONT
#define HAKMEM_TINY_MINIMAL_FRONT 1
#endif
// Strict front: compile-out optional front tiers but keep baseline structure intact
#ifndef HAKMEM_TINY_STRICT_FRONT
#define HAKMEM_TINY_STRICT_FRONT 0
#endif
// Bench-only fast path knobs (defaults)
#ifndef HAKMEM_TINY_BENCH_REFILL
#define HAKMEM_TINY_BENCH_REFILL 8
#endif
// Optional per-class overrides (bench-only)
#ifndef HAKMEM_TINY_BENCH_REFILL8
#define HAKMEM_TINY_BENCH_REFILL8 HAKMEM_TINY_BENCH_REFILL
#endif
#ifndef HAKMEM_TINY_BENCH_REFILL16
#define HAKMEM_TINY_BENCH_REFILL16 HAKMEM_TINY_BENCH_REFILL
#endif
#ifndef HAKMEM_TINY_BENCH_REFILL32
#define HAKMEM_TINY_BENCH_REFILL32 HAKMEM_TINY_BENCH_REFILL
#endif
#ifndef HAKMEM_TINY_BENCH_REFILL64
#define HAKMEM_TINY_BENCH_REFILL64 HAKMEM_TINY_BENCH_REFILL
#endif
// Bench-only warmup amounts (pre-fill TLS SLL on first alloc per class)
#ifndef HAKMEM_TINY_BENCH_WARMUP8
#define HAKMEM_TINY_BENCH_WARMUP8 64
#endif
#ifndef HAKMEM_TINY_BENCH_WARMUP16
#define HAKMEM_TINY_BENCH_WARMUP16 96
#endif
#ifndef HAKMEM_TINY_BENCH_WARMUP32
#define HAKMEM_TINY_BENCH_WARMUP32 160
#endif
#ifndef HAKMEM_TINY_BENCH_WARMUP64
#define HAKMEM_TINY_BENCH_WARMUP64 192
#endif
#ifdef HAKMEM_TINY_BENCH_FASTPATH
static __thread unsigned char g_tls_bench_warm_done[4];
#endif
#if HAKMEM_DEBUG_COUNTERS
#define HAK_PATHDBG_INC(arr, idx) do { if (g_path_debug_enabled) { (arr)[(idx)]++; } } while(0)
#define HAK_ULTRADBG_INC(arr, idx) do { (arr)[(idx)]++; } while(0)
#else
#define HAK_PATHDBG_INC(arr, idx) do { (void)(idx); } while(0)
#define HAK_ULTRADBG_INC(arr, idx) do { (void)(idx); } while(0)
#endif
// Simple scalar debug increment (no-op when HAKMEM_DEBUG_COUNTERS=0)
#if HAKMEM_DEBUG_COUNTERS
#define HAK_DBG_INC(var) do { (var)++; } while(0)
#else
#define HAK_DBG_INC(var) do { (void)0; } while(0)
#endif
// Return helper: record tiny alloc stat (guarded) then return pointer
static inline void tiny_debug_track_alloc_ret(int cls, void* ptr);
// ========== HAK_RET_ALLOC: Single Definition Point ==========
// Choose implementation based on HAKMEM_TINY_HEADER_CLASSIDX
// - Phase 7 enabled: Write header and return user pointer
// - Phase 7 disabled: Legacy behavior (stats + route + return)
#if HAKMEM_TINY_HEADER_CLASSIDX
#if HAKMEM_BUILD_RELEASE
// Phase E1-CORRECT: ALL classes have 1-byte headers (including C7)
// Ultra-fast inline macro (3-4 instructions)
#define HAK_RET_ALLOC(cls, base_ptr) do { \
*(uint8_t*)(base_ptr) = HEADER_MAGIC | ((cls) & HEADER_CLASS_MASK); \
return (void*)((uint8_t*)(base_ptr) + 1); \
} while(0)
#else
// Debug: Keep full validation via tiny_region_id_write_header()
#define HAK_RET_ALLOC(cls, ptr) return tiny_region_id_write_header((ptr), (cls))
#endif
#else
// Legacy: Stats and routing before return
#ifdef HAKMEM_ENABLE_STATS
// Optional: samplingビルド時に有効化。ホットパスは直接インライン呼び出し間接分岐なし
#ifdef HAKMEM_TINY_STAT_SAMPLING
static __thread unsigned g_tls_stat_accum_alloc[TINY_NUM_CLASSES];
static int g_stat_rate_lg = 0; // 0=毎回、それ以外=2^lgごと
static inline __attribute__((always_inline)) void hkm_stat_alloc(int cls) {
if (__builtin_expect(g_stat_rate_lg == 0, 1)) { stats_record_alloc(cls); return; }
unsigned m = (1u << g_stat_rate_lg) - 1u;
if (((++g_tls_stat_accum_alloc[cls]) & m) == 0u) stats_record_alloc(cls);
}
#else
static inline __attribute__((always_inline)) void hkm_stat_alloc(int cls) { stats_record_alloc(cls); }
#endif
#define HAK_RET_ALLOC(cls, ptr) do { \
tiny_debug_track_alloc_ret((cls), (ptr)); \
hkm_stat_alloc((cls)); \
ROUTE_COMMIT((cls), 0x7F); \
return (ptr); \
} while(0)
#else
#define HAK_RET_ALLOC(cls, ptr) do { \
tiny_debug_track_alloc_ret((cls), (ptr)); \
ROUTE_COMMIT((cls), 0x7F); \
return (ptr); \
} while(0)
#endif
#endif // HAKMEM_TINY_HEADER_CLASSIDX
// Free-side stats: compile-time zero when stats disabled
#ifdef HAKMEM_ENABLE_STATS
#define HAK_STAT_FREE(cls) do { stats_record_free((cls)); } while(0)
#else
#define HAK_STAT_FREE(cls) do { } while(0)
#endif
// Forward declarations for static helpers used before definition
struct TinySlab; // forward
static void move_to_free_list(int class_idx, struct TinySlab* target_slab);
static void move_to_full_list(int class_idx, struct TinySlab* target_slab);
static void release_slab(struct TinySlab* slab);
static TinySlab* allocate_new_slab(int class_idx);
static void tiny_tls_cache_drain(int class_idx);
static void tiny_apply_mem_diet(void);
// Phase 6.23: SuperSlab allocation forward declaration
static inline void* hak_tiny_alloc_superslab(int class_idx);
static inline void* superslab_tls_bump_fast(int class_idx);
SuperSlab* superslab_refill(int class_idx);
static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx);
static inline uint32_t sll_cap_for_class(int class_idx, uint32_t mag_cap);
// Forward decl: used by tiny_spec_pop_path before its definition
#if HAKMEM_TINY_P0_BATCH_REFILL
// P0 enabled: sll_refill_batch_from_ss is defined in hakmem_tiny_refill_p0.inc.h
static inline int sll_refill_batch_from_ss(int class_idx, int max_take);
#else
// Phase 12: sll_refill_small_from_ss is defined in hakmem_tiny_refill.inc.h
// Only a single implementation exists there; declare here for callers.
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
int sll_refill_small_from_ss(int class_idx, int max_take);
#else
static inline int sll_refill_small_from_ss(int class_idx, int max_take);
#endif
#endif
static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss);
static void* __attribute__((cold, noinline)) tiny_slow_alloc_fast(int class_idx);
static inline void tiny_remote_drain_owner(struct TinySlab* slab);
static void tiny_remote_drain_locked(struct TinySlab* slab);
// Ultra-fast try-only variant: attempt a direct SuperSlab bump/freelist pop
// without any refill or slow-path work. Returns NULL on miss.
/* moved below TinyTLSSlab definition */
// Step 3d: Forced inlining for readability + performance (306M target)
__attribute__((always_inline))
static inline void* hak_tiny_alloc_wrapper(int class_idx);
// Helpers for SuperSlab active block accounting (atomic, saturating dec)
void ss_active_add(SuperSlab* ss, uint32_t n) {
atomic_fetch_add_explicit(&ss->total_active_blocks, n, memory_order_relaxed);
}
static inline __attribute__((always_inline)) void ss_active_inc(SuperSlab* ss) {
atomic_fetch_add_explicit(&ss->total_active_blocks, 1u, memory_order_relaxed);
}
// EXTRACTED: ss_active_dec_one() moved to hakmem_tiny_superslab.h (Phase 2C-2)
// Front refill count global config (declare before init.inc uses them)
extern int g_refill_count_global;
extern int g_refill_count_hot;
extern int g_refill_count_mid;
extern int g_refill_count_class[TINY_NUM_CLASSES];
// Step 3d: Forced inlining for slow path (maintain monolithic performance)
// Phase 6-1.7: Export for box refactor (Box 5 needs access from hakmem.c)
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
void* __attribute__((cold, noinline)) hak_tiny_alloc_slow(size_t size, int class_idx);
#else
static void* __attribute__((cold, noinline)) hak_tiny_alloc_slow(size_t size, int class_idx);
#endif
// ---------------------------------------------------------------------------
// Box: adopt_gate_try (implementation moved from header for robust linkage)
// ---------------------------------------------------------------------------
#include "box/adopt_gate_box.h"
extern SuperSlab* g_super_reg_by_class[TINY_NUM_CLASSES][SUPER_REG_PER_CLASS];
extern int g_super_reg_class_size[TINY_NUM_CLASSES];
extern unsigned long long g_adopt_gate_calls[];
extern unsigned long long g_adopt_gate_success[];
extern unsigned long long g_reg_scan_attempts[];
extern unsigned long long g_reg_scan_hits[];
SuperSlab* adopt_gate_try(int class_idx, TinyTLSSlab* tls) {
g_adopt_gate_calls[class_idx]++;
ROUTE_MARK(13);
SuperSlab* ss = tiny_refill_try_fast(class_idx, tls);
if (ss) { g_adopt_gate_success[class_idx]++; return ss; }
g_reg_scan_attempts[class_idx]++;
int reg_size = g_super_reg_class_size[class_idx];
int scan_limit = tiny_reg_scan_max();
if (scan_limit > reg_size) scan_limit = reg_size;
uint32_t self_tid = tiny_self_u32();
// Local helper (mirror adopt_bind_if_safe) to avoid including alloc inline here
auto int adopt_bind_if_safe_local(TinyTLSSlab* tls_l, SuperSlab* ss, int slab_idx, int class_idx_l) {
uint32_t self_tid = tiny_self_u32();
SlabHandle h = slab_try_acquire(ss, slab_idx, self_tid);
if (!slab_is_valid(&h)) return 0;
slab_drain_remote_full(&h);
if (__builtin_expect(slab_is_safe_to_bind(&h), 1)) {
tiny_tls_bind_slab(tls_l, h.ss, h.slab_idx);
slab_release(&h);
return 1;
}
slab_release(&h);
return 0;
}
for (int i = 0; i < scan_limit; i++) {
SuperSlab* cand = g_super_reg_by_class[class_idx][i];
if (!(cand && cand->magic == SUPERSLAB_MAGIC)) continue;
// Fast path: use nonempty_mask / freelist_mask to locate candidates in O(1)
uint32_t mask = cand->nonempty_mask;
// Fallback to atomic freelist_mask for cross-thread visibility
if (mask == 0) {
mask = atomic_load_explicit(&cand->freelist_mask, memory_order_acquire);
}
if (mask == 0) continue; // No visible freelists in this SS
int cap = ss_slabs_capacity(cand);
while (mask) {
int sidx = __builtin_ctz(mask);
mask &= (mask - 1);
if (sidx >= cap) continue;
if (adopt_bind_if_safe_local(tls, cand, sidx, class_idx)) {
g_adopt_gate_success[class_idx]++;
g_reg_scan_hits[class_idx]++;
ROUTE_MARK(14); ROUTE_COMMIT(class_idx, 0x07);
return cand;
}
}
}
return NULL;
}
// ============================================================================
// Global State
// ============================================================================
// Global pool instance (extern declared in hakmem_tiny.h)
TinyPool g_tiny_pool;
int g_tiny_initialized = 0; // Not static (extern in header for inline access)
// Runtime toggle: allow Tiny allocations even inside malloc/free wrappers
// Phase 7.3 LESSONS LEARNED: Async optimization (Phase 1+2) FAILED
//
// Results:
// Phase 1 (Push - deferred free): +1 instruction, zero benefit
// Phase 2 (Pull - background refill): +77 instructions, -3% performance
//
// Root cause: Both optimized SLOW PATH (bitmap scan), but benchmark hits FAST PATH 99.9%
// - TLS Magazine capacity: 2048 items
// - Benchmark working set: 100 items
// - Magazine hit rate: 100% after warmup
// - Slow path never executed!
//
// Real bottleneck: FAST PATH (TLS magazine access) = 228 instructions/op
// - glibc: ~40 instructions/op (5-7× faster)
// - Gap is architectural (bitmap vs free-list, research features)
//
// Phase 7.4: getenv fix achieved 86% speedup → now FASTER than glibc!
// Results: 120-164 M ops/sec (vs glibc 105 M ops/sec) = 15-57% faster ✅
// Decision: Enable by default (proven production-ready)
static int g_wrap_tiny_enabled = 1; // ON by default (faster than glibc!)
// Optional: allow limited trylock-based refill during wrapper calls
static int g_wrap_tiny_refill = 0;
// Remote-free drain controls
static int g_remote_drain_thresh = 32; // HAKMEM_TINY_REMOTE_DRAIN_THRESHOLD (global fallback)
static int g_remote_drain_tryrate = 16; // HAKMEM_TINY_REMOTE_DRAIN_TRYRATE (1/N probability)
// ACE Learning Layer: Per-class remote drain thresholds
int g_remote_drain_thresh_per_class[TINY_NUM_CLASSES] = {32, 32, 32, 32, 32, 32, 32, 32};
// Sampled counter updates (Phase 3: Replaced with batched TLS counters)
// Old: XOR RNG sampling (10-15 ns overhead)
// New: Batched stats in hakmem_tiny_stats.h (0.5 ns overhead)
static int g_tiny_count_sample_exp = 8; // HAKMEM_TINY_COUNT_SAMPLE (kept for compatibility)
// Step 2: Slab Registry (Hash Table)
SlabRegistryEntry g_slab_registry[SLAB_REGISTRY_SIZE];
PaddedLock g_tiny_class_locks[TINY_NUM_CLASSES];
// Registry lock
pthread_mutex_t g_tiny_registry_lock = PTHREAD_MUTEX_INITIALIZER;
// Phase 6.14: Runtime toggle for Registry ON/OFF (default OFF)
// O(N) Sequential Access is faster than O(1) Random Access for Small-N (8-32 slabs)
// Reason: L1 cache hit率 95%+ (Sequential) vs 50-70% (Random Hash)
static int g_use_registry = 1; // Default ON for thread-safety
// TLS Magazines (P1) definitions moved to hakmem_tiny_magazine.h
// Upper bound for one-shot refill from a slab when the magazine is low (runtime tunable)
static int g_tiny_refill_max = 64; // HAKMEM_TINY_REFILL_MAX (default 64)
static int g_tiny_refill_max_hot = 192; // HAKMEM_TINY_REFILL_MAX_HOT for classes<=3 (default 192)
// hakmem_tiny_tls_list.h already included at top
static __thread TinyTLSList g_tls_lists[TINY_NUM_CLASSES];
static int g_tls_list_enable = 0; // Default OFF for bench; override via HAKMEM_TINY_TLS_LIST=1
static inline int tls_refill_from_tls_slab(int class_idx, TinyTLSList* tls, uint32_t want);
static int g_fast_enable = 1;
static int g_fastcache_enable = 1; // Default ON (array stack for C0-C3); override via HAKMEM_TINY_FASTCACHE=0
static uint16_t g_fast_cap[TINY_NUM_CLASSES];
static int g_ultra_bump_shadow = 0; // HAKMEM_TINY_BUMP_SHADOW=1
static uint8_t g_fast_cap_locked[TINY_NUM_CLASSES];
typedef void* (*TinyHotAllocFn)(void);
static TinyHotAllocFn g_hot_alloc_fn[TINY_NUM_CLASSES];
static __thread void* g_fast_head[TINY_NUM_CLASSES];
static __thread uint16_t g_fast_count[TINY_NUM_CLASSES];
static inline void tls_list_spill_excess(int class_idx, TinyTLSList* tls);
uint64_t g_tls_hit_count[TINY_NUM_CLASSES];
uint64_t g_tls_miss_count[TINY_NUM_CLASSES];
uint64_t g_tls_spill_ss_count[TINY_NUM_CLASSES];
uint64_t g_tls_spill_owner_count[TINY_NUM_CLASSES];
uint64_t g_tls_spill_mag_count[TINY_NUM_CLASSES];
uint64_t g_tls_spill_requeue_count[TINY_NUM_CLASSES];
// Legacy magazine definitions have been moved to hakmem_tiny_magazine.h
// NEW: Per-thread active slabs (up to 2 per class)
static __thread TinySlab* g_tls_active_slab_a[TINY_NUM_CLASSES];
static __thread TinySlab* g_tls_active_slab_b[TINY_NUM_CLASSES];
static inline __attribute__((always_inline)) TinySlab* tls_active_owner_for_ptr(int class_idx, void* ptr) {
TinySlab* cand = g_tls_active_slab_a[class_idx];
if (cand) {
uintptr_t base = (uintptr_t)cand->base;
if ((uintptr_t)ptr >= base && (uintptr_t)ptr < base + (uintptr_t)TINY_SLAB_SIZE) {
return cand;
}
}
cand = g_tls_active_slab_b[class_idx];
if (cand) {
uintptr_t base = (uintptr_t)cand->base;
if ((uintptr_t)ptr >= base && (uintptr_t)ptr < base + (uintptr_t)TINY_SLAB_SIZE) {
return cand;
}
}
return NULL;
}
// Phase 6.23: SuperSlab support (mimalloc-style fast allocation)
// Runtime toggle (global, defined in hakmem_config.c). Default is ON for Box Refactor line.
extern int g_use_superslab;
#if !HAKMEM_BUILD_RELEASE
static inline void tiny_debug_track_alloc_ret(int cls, void* ptr) {
if (!ptr) return;
// ✅ Phase E1-CORRECT: ptr is USER pointer, convert to BASE
void* base_ptr = ptr ? (void*)((uint8_t*)ptr - 1) : NULL;
if (g_use_superslab && __builtin_expect(tiny_refill_failfast_level() >= 2, 0)) {
SuperSlab* ss = hak_super_lookup(ptr);
if (!(ss && ss->magic == SUPERSLAB_MAGIC)) {
tiny_failfast_abort_ptr("alloc_ret_lookup", ss, -1, ptr, "lookup_fail");
} else {
int slab_idx = slab_index_for(ss, base_ptr);
if (slab_idx < 0) {
tiny_failfast_abort_ptr("alloc_ret_slabidx", ss, slab_idx, ptr, "slab_idx_mismatch");
} else {
// Fail-Fast: class vs SuperSlab size_class must be consistent.
TinySlabMeta* meta = &ss->slabs[slab_idx];
if (meta->class_idx != (uint8_t)cls) {
tiny_failfast_abort_ptr("alloc_ret_cls_mismatch", ss, slab_idx, ptr, "class_mismatch");
}
size_t blk = g_tiny_class_sizes[cls];
uintptr_t base = (uintptr_t)tiny_slab_base_for(ss, slab_idx);
uintptr_t delta = (uintptr_t)base_ptr - base;
if (blk == 0 || (delta % blk) != 0) {
tiny_failfast_abort_ptr("alloc_ret_align", ss, slab_idx, ptr, "misaligned");
} else if (delta / blk >= ss->slabs[slab_idx].capacity) {
tiny_failfast_abort_ptr("alloc_ret_range", ss, slab_idx, ptr, "out_of_capacity");
}
}
}
}
if (!__builtin_expect(g_debug_remote_guard, 0)) return;
if (!g_use_superslab) return;
SuperSlab* ss = hak_super_lookup(ptr);
if (!(ss && ss->magic == SUPERSLAB_MAGIC)) return;
int slab_idx = slab_index_for(ss, base_ptr);
if (slab_idx >= 0) {
tiny_remote_track_on_alloc(ss, slab_idx, ptr, "alloc_ret", 0);
}
}
#else
static inline void tiny_debug_track_alloc_ret(int cls, void* ptr) { (void)cls; (void)ptr; }
#endif
// Debug counters for SuperSlab investigation
#if HAKMEM_DEBUG_COUNTERS
int g_superslab_alloc_count = 0;
int g_superslab_fail_count = 0;
int g_superslab_free_count = 0; // Phase 7.6: Track SuperSlab frees
int g_empty_superslab_count = 0; // Phase 7.6: Track empty SuperSlabs detected
int g_magazine_push_count = 0; // Phase 7.6: Track Magazine pushes
int g_tiny_free_with_slab_count = 0; // Phase 7.6: Track tiny_free_with_slab calls
#endif
// Phase 7.6: Deferred deallocation - keep some empty SuperSlabs as reserve
// Phase 8.1: Reduced from 2 to 1 (-2 MB overhead, minimal performance impact)
// Phase 8.2: Testing Reserve 0 vs 1 (benchmarking in progress)
#define EMPTY_SUPERSLAB_RESERVE 0 // Keep up to N empty SuperSlabs per class (default)
static SuperSlab* g_empty_superslabs[TINY_NUM_CLASSES]; // One empty SuperSlab per class
static int g_empty_counts[TINY_NUM_CLASSES] = {0}; // Count of empty SuperSlabs
static int g_empty_reserve = -1; // Env: HAKMEM_TINY_SS_RESERVE (default=1)
static pthread_mutex_t g_empty_lock = PTHREAD_MUTEX_INITIALIZER;
static int g_ss_partial_enable = 1; // Enable partial SuperSlab release by default
static uint32_t g_ss_partial_interval = 4;
static _Atomic uint32_t g_ss_partial_epoch = 0;
// Phase 6.24: Unified TLS slab cache (Medium fix)
// Reduces TLS reads from 3 to 1 (cache-line aligned for performance)
__thread TinyTLSSlab g_tls_slabs[TINY_NUM_CLASSES];
static _Atomic uint32_t g_tls_target_cap[TINY_NUM_CLASSES];
static _Atomic uint32_t g_tls_target_refill[TINY_NUM_CLASSES];
static _Atomic uint32_t g_tls_target_spill[TINY_NUM_CLASSES];
static _Atomic uint64_t g_tls_trim_epoch[TINY_NUM_CLASSES];
static _Atomic uint32_t g_tls_param_seq[TINY_NUM_CLASSES];
static __thread uint32_t g_tls_param_seen[TINY_NUM_CLASSES];
static __thread uint64_t g_tls_trim_seen[TINY_NUM_CLASSES];
// ----------------------------------------------------------------------------
// Per-class partial SuperSlab slot (single-slot publish/adopt)
// ----------------------------------------------------------------------------
// Small ring of partial SuperSlabs per class (publish/adopt)
#ifndef SS_PARTIAL_RING
#define SS_PARTIAL_RING 64
#endif
static _Atomic(SuperSlab*) g_ss_partial_ring[TINY_NUM_CLASSES][SS_PARTIAL_RING];
static _Atomic(uint32_t) g_ss_partial_rr[TINY_NUM_CLASSES];
static _Atomic(SuperSlab*) g_ss_partial_over[TINY_NUM_CLASSES];
static __thread int g_tls_adopt_cd[TINY_NUM_CLASSES];
static int g_adopt_cool_period = -1; // env: HAKMEM_TINY_SS_ADOPT_COOLDOWN
// Debug counters (per class): publish/adopt hits (visible when HAKMEM_DEBUG_COUNTERS)
unsigned long long g_ss_publish_dbg[TINY_NUM_CLASSES] = {0};
unsigned long long g_ss_adopt_dbg[TINY_NUM_CLASSES] = {0};
_Atomic int g_ss_remote_seen = 0; // becomes 1 when any remote free occurs
static int g_ss_adopt_env = -2; // -2=unparsed, -1=forced OFF, 0=auto, 1=forced ON
static _Atomic int g_ss_adopt_runtime = 0; // 0=inactive, 1=active
static _Atomic int g_ss_adopt_log_once = 0;
static void tiny_adopt_gate_log_activation(const char* reason, int class_idx) {
if (atomic_exchange_explicit(&g_ss_adopt_log_once, 1, memory_order_acq_rel) == 0) {
fprintf(stderr, "[ADOPT_GATE] activated (reason=%s class=%d)\n",
reason ? reason : "unknown", class_idx);
}
}
static inline void tiny_adopt_gate_parse_env(void) {
if (__builtin_expect(g_ss_adopt_env == -2, 0)) {
const char* env = getenv("HAKMEM_TINY_SS_ADOPT");
if (!env || *env == '\0') {
g_ss_adopt_env = 0; // auto
} else if (*env == '0') {
g_ss_adopt_env = -1; // forced OFF
atomic_store_explicit(&g_ss_adopt_runtime, 0, memory_order_release);
} else {
g_ss_adopt_env = 1; // forced ON
atomic_store_explicit(&g_ss_adopt_runtime, 1, memory_order_release);
tiny_adopt_gate_log_activation("env", -1);
}
}
}
int tiny_adopt_gate_should_publish(void) {
tiny_adopt_gate_parse_env();
if (g_ss_adopt_env == 1) return 1;
if (g_ss_adopt_env == -1) return 0;
return atomic_load_explicit(&g_ss_adopt_runtime, memory_order_acquire) != 0;
}
int tiny_adopt_gate_should_adopt(void) {
tiny_adopt_gate_parse_env();
if (g_ss_adopt_env == 1) return 1;
if (g_ss_adopt_env == -1) return 0;
return atomic_load_explicit(&g_ss_adopt_runtime, memory_order_acquire) != 0;
}
void tiny_adopt_gate_on_remote_seen(int class_idx) {
tiny_adopt_gate_parse_env();
atomic_store_explicit(&g_ss_remote_seen, 1, memory_order_relaxed);
if (g_ss_adopt_env == -1) return;
int prev = atomic_exchange_explicit(&g_ss_adopt_runtime, 1, memory_order_acq_rel);
if (prev == 0) {
tiny_adopt_gate_log_activation("remote", class_idx);
}
}
// TLS hint: last adopted SuperSlab/slab to avoid rescans
#include "tiny_sticky.h"
// Mailbox box
#include "box/mailbox_box.h"
// Publish pipeline counters (visibility)
unsigned long long g_pub_notify_calls[TINY_NUM_CLASSES] = {0};
unsigned long long g_pub_same_empty[TINY_NUM_CLASSES] = {0};
unsigned long long g_remote_transitions[TINY_NUM_CLASSES] = {0};
unsigned long long g_mailbox_register_calls[TINY_NUM_CLASSES] = {0};
unsigned long long g_mailbox_slow_discoveries[TINY_NUM_CLASSES] = {0};
// Slab-ring counters (debug)
unsigned long long g_slab_publish_dbg[TINY_NUM_CLASSES] = {0};
unsigned long long g_slab_adopt_dbg[TINY_NUM_CLASSES] = {0};
unsigned long long g_slab_requeue_dbg[TINY_NUM_CLASSES] = {0};
unsigned long long g_slab_miss_dbg[TINY_NUM_CLASSES] = {0};
// Slab entry encoding helpers (used by Bench/Slab-ring paths)
static inline uintptr_t slab_entry_make(SuperSlab* ss, int slab_idx) {
return ((uintptr_t)ss) | ((uintptr_t)slab_idx & 0x3Fu);
}
static inline SuperSlab* slab_entry_ss(uintptr_t ent) {
// SuperSlab is aligned to at least 1MB; clear low 1MB bits to recover base
return (SuperSlab*)(ent & ~((uintptr_t)SUPERSLAB_SIZE_MIN - 1u));
}
static inline int slab_entry_idx(uintptr_t ent) {
return (int)(ent & 0x3Fu);
}
// ----------------------------------------------------------------------------
// Bench Mode Publish Mailbox (single-slot per class)
// ----------------------------------------------------------------------------
static int g_bench_mode = -1; // env: HAKMEM_TINY_BENCH_MODE=1
static _Atomic(uintptr_t) g_bench_mailbox_rr[TINY_NUM_CLASSES];
#ifndef BENCH_MAILBOX_WIDTH
#define BENCH_MAILBOX_WIDTH 16
#endif
static _Atomic(uintptr_t) g_bench_mailbox[TINY_NUM_CLASSES][BENCH_MAILBOX_WIDTH];
static inline int bench_mode_enabled(void) {
if (__builtin_expect(g_bench_mode == -1, 0)) {
const char* b = getenv("HAKMEM_TINY_BENCH_MODE");
g_bench_mode = (b && atoi(b) != 0) ? 1 : 0;
}
return g_bench_mode;
}
static inline void bench_pub_push(int class_idx, SuperSlab* ss, int slab_idx) {
if (!bench_mode_enabled()) return;
uintptr_t ent = slab_entry_make(ss, slab_idx);
uint32_t idx = atomic_fetch_add_explicit(&g_bench_mailbox_rr[class_idx], 1u, memory_order_relaxed);
idx &= (BENCH_MAILBOX_WIDTH - 1);
atomic_store_explicit(&g_bench_mailbox[class_idx][idx], ent, memory_order_release);
}
static inline uintptr_t bench_pub_pop(int class_idx) {
if (!bench_mode_enabled()) return (uintptr_t)0;
for (int i = 0; i < BENCH_MAILBOX_WIDTH; i++) {
uintptr_t ent = atomic_exchange_explicit(&g_bench_mailbox[class_idx][i], (uintptr_t)0, memory_order_acq_rel);
if (ent) return ent;
}
return 0;
}
// ----------------------------------------------------------------------------
// Slab-Granular Partial Publish/Adopt (encoded entries)
// ----------------------------------------------------------------------------
#ifndef SLAB_PARTIAL_RING
#define SLAB_PARTIAL_RING 128
#endif
static _Atomic(uintptr_t) g_slab_partial_ring[TINY_NUM_CLASSES][SLAB_PARTIAL_RING];
static _Atomic(uint32_t) g_slab_partial_rr2[TINY_NUM_CLASSES];
// ----------------------------------------------------------------------------
// Refill-stage counters (per class)
// ----------------------------------------------------------------------------
unsigned long long g_rf_total_calls[TINY_NUM_CLASSES] = {0};
unsigned long long g_rf_hit_bench[TINY_NUM_CLASSES] = {0};
unsigned long long g_rf_hit_hot[TINY_NUM_CLASSES] = {0};
unsigned long long g_rf_hit_ready[TINY_NUM_CLASSES] = {0};
unsigned long long g_rf_hit_mail[TINY_NUM_CLASSES] = {0};
unsigned long long g_rf_hit_slab[TINY_NUM_CLASSES] = {0};
unsigned long long g_rf_hit_ss[TINY_NUM_CLASSES] = {0};
unsigned long long g_rf_hit_reg[TINY_NUM_CLASSES] = {0};
unsigned long long g_rf_mmap_calls[TINY_NUM_CLASSES] = {0};
// Diagnostic: refill early return counters (to debug why g_rf_hit_slab is 0)
unsigned long long g_rf_early_no_ss[TINY_NUM_CLASSES] = {0};
unsigned long long g_rf_early_no_meta[TINY_NUM_CLASSES] = {0};
unsigned long long g_rf_early_no_room[TINY_NUM_CLASSES] = {0};
unsigned long long g_rf_early_want_zero[TINY_NUM_CLASSES] = {0};
// Refill timing (ns) per class and per stage (env: HAKMEM_TINY_RF_TRACE)
unsigned long long g_rf_time_total_ns[TINY_NUM_CLASSES] = {0};
unsigned long long g_rf_time_hot_ns[TINY_NUM_CLASSES] = {0};
unsigned long long g_rf_time_bench_ns[TINY_NUM_CLASSES] = {0};
unsigned long long g_rf_time_mail_ns[TINY_NUM_CLASSES] = {0};
unsigned long long g_rf_time_slab_ns[TINY_NUM_CLASSES] = {0};
unsigned long long g_rf_time_ss_ns[TINY_NUM_CLASSES] = {0};
unsigned long long g_rf_time_reg_ns[TINY_NUM_CLASSES] = {0};
unsigned long long g_rf_time_mmap_ns[TINY_NUM_CLASSES] = {0};
// Refill item source breakdown (freelist vs carve)
unsigned long long g_rf_freelist_items[TINY_NUM_CLASSES] = {0};
unsigned long long g_rf_carve_items[TINY_NUM_CLASSES] = {0};
static int g_rf_trace_en = -1;
static inline int rf_trace_enabled(void) {
if (__builtin_expect(g_rf_trace_en == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_RF_TRACE");
g_rf_trace_en = (e && atoi(e) != 0) ? 1 : 0;
}
return g_rf_trace_en;
}
static inline unsigned long long rf_now_ns(void) {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (unsigned long long)ts.tv_sec * 1000000000ull + (unsigned long long)ts.tv_nsec;
}
// moved to tiny_sticky.c
// moved to tiny_remote.c
// moved to tiny_mailbox.c
// Publish-side counters (debug)
unsigned long long g_pub_bench_hits[TINY_NUM_CLASSES] = {0};
unsigned long long g_pub_hot_hits[TINY_NUM_CLASSES] = {0};
unsigned long long g_pub_mail_hits[TINY_NUM_CLASSES] = {0};
// Free pipeline counters
unsigned long long g_free_via_ss_local[TINY_NUM_CLASSES] = {0};
unsigned long long g_free_via_ss_remote[TINY_NUM_CLASSES] = {0};
unsigned long long g_free_via_tls_sll[TINY_NUM_CLASSES] = {0};
unsigned long long g_free_via_mag[TINY_NUM_CLASSES] = {0};
// Front Gate Breakdown (debug counters)
unsigned long long g_front_sfc_hit[TINY_NUM_CLASSES] = {0};
unsigned long long g_front_sll_hit[TINY_NUM_CLASSES] = {0};
unsigned long long g_front_quick_hit[TINY_NUM_CLASSES] = {0};
unsigned long long g_front_mag_hit[TINY_NUM_CLASSES] = {0};
// Free-side trigger counters
unsigned long long g_first_free_transitions[TINY_NUM_CLASSES] = {0};
unsigned long long g_remote_free_transitions[TINY_NUM_CLASSES] = {0};
// Adopt/Registry gate counters
unsigned long long g_adopt_gate_calls[TINY_NUM_CLASSES] = {0};
unsigned long long g_adopt_gate_success[TINY_NUM_CLASSES] = {0};
unsigned long long g_reg_scan_attempts[TINY_NUM_CLASSES] = {0};
unsigned long long g_reg_scan_hits[TINY_NUM_CLASSES] = {0};
unsigned long long g_free_via_fast_tls[TINY_NUM_CLASSES] = {0};
unsigned long long g_free_via_fastcache[TINY_NUM_CLASSES] = {0};
unsigned long long g_fast_spare_flush[TINY_NUM_CLASSES] = {0};
unsigned long long g_fast_push_hits[TINY_NUM_CLASSES] = {0};
unsigned long long g_fast_push_full[TINY_NUM_CLASSES] = {0};
unsigned long long g_fast_push_disabled[TINY_NUM_CLASSES] = {0};
unsigned long long g_fast_push_zero_cap[TINY_NUM_CLASSES] = {0};
unsigned long long g_fast_push_gate_disabled[TINY_NUM_CLASSES] = {0};
unsigned long long g_fast_push_gate_zero_cap[TINY_NUM_CLASSES] = {0};
unsigned long long g_fast_spare_attempts[TINY_NUM_CLASSES] = {0};
unsigned long long g_fast_spare_disabled[TINY_NUM_CLASSES] = {0};
unsigned long long g_fast_spare_empty[TINY_NUM_CLASSES] = {0};
unsigned long long g_fast_spare_lookup_fail[TINY_NUM_CLASSES] = {0};
unsigned long long g_fast_spare_bad_index[TINY_NUM_CLASSES] = {0};
unsigned long long g_fast_lookup_ss[TINY_NUM_CLASSES] = {0};
unsigned long long g_fast_lookup_slab[TINY_NUM_CLASSES] = {0};
unsigned long long g_fast_lookup_none = 0;
// ----------------------------------------------------------------------------
// Live Superslab cap (must-adopt-before-mmap support)
// ----------------------------------------------------------------------------
static int g_live_cap_env = -2; // -2=unparsed, -1=disabled, >=0=cap value
__thread int g_tls_live_ss[TINY_NUM_CLASSES] = {0};
static inline int live_cap_for_class(int class_idx) {
if (__builtin_expect(g_live_cap_env == -2, 0)) {
const char* s = getenv("HAKMEM_SS_LIVE_CAP");
if (!s) g_live_cap_env = -1; else { int v = atoi(s); g_live_cap_env = (v>0? v : -1); }
}
(void)class_idx;
return g_live_cap_env;
}
// ----------------------------------------------------------------------------
// Hot Slot (global simple path)
// ----------------------------------------------------------------------------
static int g_hot_slot_en = -1; // env: HAKMEM_HOT_SLOT=1 (bench mode implies hot slot)
static _Atomic(uintptr_t) g_hot_slot[TINY_NUM_CLASSES];
static inline int hot_slot_enabled(void) {
if (__builtin_expect(g_hot_slot_en == -1, 0)) {
const char* s = getenv("HAKMEM_HOT_SLOT");
g_hot_slot_en = (s && atoi(s) != 0) ? 1 : 0;
}
return g_hot_slot_en || bench_mode_enabled();
}
static inline void hot_slot_push(int class_idx, SuperSlab* ss, int slab_idx) {
if (!hot_slot_enabled()) return;
uintptr_t ent = slab_entry_make(ss, slab_idx);
atomic_exchange_explicit(&g_hot_slot[class_idx], ent, memory_order_release);
}
static inline uintptr_t hot_slot_pop(int class_idx) {
if (!hot_slot_enabled()) return (uintptr_t)0;
return atomic_exchange_explicit(&g_hot_slot[class_idx], (uintptr_t)0, memory_order_acq_rel);
}
// moved to tiny_publish.c
static __attribute__((unused)) void slab_partial_publish(int class_idx, SuperSlab* ss, int slab_idx) {
if (!ss) return;
uintptr_t ent = slab_entry_make(ss, slab_idx);
for (int i = 0; i < SLAB_PARTIAL_RING; i++) {
uintptr_t expected = 0;
if (atomic_compare_exchange_strong_explicit(&g_slab_partial_ring[class_idx][i], &expected, ent,
memory_order_release, memory_order_relaxed)) {
g_slab_publish_dbg[class_idx]++;
return;
}
}
// Ring full: round-robin replace and try to requeue the displaced entry into another empty slot
uint32_t idx = atomic_fetch_add_explicit(&g_slab_partial_rr2[class_idx], 1u, memory_order_relaxed) % SLAB_PARTIAL_RING;
uintptr_t old = atomic_exchange_explicit(&g_slab_partial_ring[class_idx][idx], ent, memory_order_acq_rel);
if (old) {
for (int t = 0; t < 8; t++) {
uint32_t j = atomic_fetch_add_explicit(&g_slab_partial_rr2[class_idx], 1u, memory_order_relaxed) % SLAB_PARTIAL_RING;
uintptr_t expected = 0;
if (atomic_compare_exchange_weak_explicit(&g_slab_partial_ring[class_idx][j], &expected, old,
memory_order_release, memory_order_relaxed)) {
g_slab_requeue_dbg[class_idx]++;
old = 0; break;
}
}
}
g_slab_publish_dbg[class_idx]++;
}
static __attribute__((unused)) uintptr_t slab_partial_adopt(int class_idx) {
for (int i = 0; i < SLAB_PARTIAL_RING; i++) {
uintptr_t ent = atomic_exchange_explicit(&g_slab_partial_ring[class_idx][i], (uintptr_t)0, memory_order_acq_rel);
if (ent) return ent;
}
return 0;
}
void ss_partial_publish(int class_idx, SuperSlab* ss) {
if (!ss) return;
// Gate by listed flag to avoid repeated publishes of the same SS
unsigned prev = atomic_exchange_explicit(&ss->listed, 1u, memory_order_acq_rel);
if (prev != 0u) return; // already listed
// CRITICAL: Release ownership of all slabs so adopters can claim them!
// Without this, ss_owner_try_acquire() will fail (requires owner_tid==0).
// The publishing thread must stop using this SS after publishing.
int cap_pub = ss_slabs_capacity(ss);
for (int s = 0; s < cap_pub; s++) {
uint8_t prev = __atomic_exchange_n(&ss->slabs[s].owner_tid_low, 0u, __ATOMIC_RELEASE);
if (__builtin_expect(g_debug_remote_guard && prev != 0u, 0)) {
uintptr_t aux = ((uintptr_t)s << 32) | (uintptr_t)prev;
tiny_debug_ring_record(TINY_RING_EVENT_OWNER_RELEASE,
(uint16_t)ss->slabs[s].class_idx,
&ss->slabs[s],
aux);
}
}
// CRITICAL: Unbind current thread's TLS if it points to this SS!
// Otherwise, the publishing thread will continue allocating from the published SS,
// racing with adopters who acquire ownership.
extern __thread TinyTLSSlab g_tls_slabs[];
if (g_tls_slabs[class_idx].ss == ss) {
g_tls_slabs[class_idx].ss = NULL;
g_tls_slabs[class_idx].meta = NULL;
g_tls_slabs[class_idx].slab_base = NULL;
g_tls_slabs[class_idx].slab_idx = 0;
}
// Compute a quick best-slab hint for adopters (prefer slabs marked slab_listed=1)
int best = -1; uint32_t best_score = 0;
for (int s = 0; s < cap_pub; s++) {
TinySlabMeta* m = &ss->slabs[s];
uint32_t rc = atomic_load_explicit(&ss->remote_counts[s], memory_order_relaxed);
int has_remote = (atomic_load_explicit(&ss->remote_heads[s], memory_order_acquire) != 0);
unsigned listed = atomic_load_explicit(&ss->slab_listed[s], memory_order_relaxed) ? 1u : 0u;
uint32_t score = rc
+ (m->freelist ? (1u<<30) : 0u)
+ (listed ? (1u<<29) : 0u)
+ (has_remote ? 1u : 0u);
if (score > best_score) { best_score = score; best = s; }
}
if (best >= 0 && best < 256) {
ss->publish_hint = (uint8_t)best;
// Box: Ready push — provide slab-level candidate to adopters
tiny_ready_push(class_idx, ss, best);
} else {
ss->publish_hint = 0xFF;
}
for (int i = 0; i < SS_PARTIAL_RING; i++) {
SuperSlab* expected = NULL;
if (atomic_compare_exchange_strong_explicit(&g_ss_partial_ring[class_idx][i], &expected, ss,
memory_order_release, memory_order_relaxed)) {
g_ss_publish_dbg[class_idx]++;
return; // published
}
}
// Ring full: replace one entry in round-robin to avoid dropping supply
uint32_t idx = atomic_fetch_add_explicit(&g_ss_partial_rr[class_idx], 1u, memory_order_relaxed);
idx %= SS_PARTIAL_RING;
SuperSlab* old = atomic_exchange_explicit(&g_ss_partial_ring[class_idx][idx], ss, memory_order_acq_rel);
if (old) {
// NOTE: Do NOT drain here! The old SuperSlab may have slabs owned by other threads
// that just adopted from it. Draining without ownership checks causes freelist corruption.
// The adopter will drain when needed (with proper ownership checks in tiny_refill.h).
//
// Previous code (UNSAFE):
// for (int s = 0; s < cap; s++) {
// ss_remote_drain_to_freelist(old, s); // ← Race with concurrent adopter!
// }
// Keep listed=1 while in overflow so it stays eligible for adopt
// Push old into overflow stack (待機箱)
SuperSlab* head;
do {
head = atomic_load_explicit(&g_ss_partial_over[class_idx], memory_order_acquire);
old->partial_next = head;
} while (!atomic_compare_exchange_weak_explicit(&g_ss_partial_over[class_idx], &head, old,
memory_order_release, memory_order_relaxed));
}
g_ss_publish_dbg[class_idx]++;
}
SuperSlab* ss_partial_adopt(int class_idx) {
for (int i = 0; i < SS_PARTIAL_RING; i++) {
SuperSlab* ss = atomic_exchange_explicit(&g_ss_partial_ring[class_idx][i], NULL, memory_order_acq_rel);
if (ss) {
// Clear listed flag on adopt to allow future publish of this SS
atomic_store_explicit(&ss->listed, 0u, memory_order_release);
g_ss_adopt_dbg[class_idx]++;
return ss;
}
}
// Fallback: adopt from overflow stack (LIFO)
while (1) {
SuperSlab* head = atomic_load_explicit(&g_ss_partial_over[class_idx], memory_order_acquire);
if (!head) break;
SuperSlab* next = head->partial_next;
if (atomic_compare_exchange_weak_explicit(&g_ss_partial_over[class_idx], &head, next,
memory_order_acq_rel, memory_order_relaxed)) {
atomic_store_explicit(&head->listed, 0u, memory_order_release);
g_ss_adopt_dbg[class_idx]++;
return head;
}
}
return NULL;
}
static inline void tiny_tls_bind_slab(TinyTLSSlab* tls, SuperSlab* ss, int slab_idx) {
// Canonical binding under Phase 12:
// - Per-slab TinySlabMeta.class_idx defines class for this slab
// - slab_idx is the owning slab index within ss
// - slab_base is ALWAYS derived from tiny_slab_base_for(ss, slab_idx)
tls->ss = ss;
tls->slab_idx = (uint8_t)slab_idx;
tls->meta = &ss->slabs[slab_idx];
tls->slab_base = tiny_slab_base_for(ss, slab_idx);
}
static inline uint32_t tiny_tls_default_refill(uint32_t cap) {
if (cap == 0u) return 8u;
uint32_t low = (cap >= 32u) ? (cap / 4u) : 8u;
if (low < 4u) low = 4u;
return low;
}
static inline uint32_t tiny_tls_default_spill(uint32_t cap) {
if (cap == 0u) return 0u;
uint64_t spill = (uint64_t)cap + (uint64_t)(cap / 2u);
if (spill > TINY_TLS_MAG_CAP) spill = TINY_TLS_MAG_CAP;
if (spill < cap) spill = cap;
return (uint32_t)spill;
}
static inline void tiny_tls_publish_targets(int class_idx, uint32_t cap) {
atomic_store_explicit(&g_tls_target_cap[class_idx], cap, memory_order_release);
atomic_store_explicit(&g_tls_target_refill[class_idx], tiny_tls_default_refill(cap), memory_order_relaxed);
atomic_store_explicit(&g_tls_target_spill[class_idx], tiny_tls_default_spill(cap), memory_order_relaxed);
atomic_fetch_add_explicit(&g_tls_param_seq[class_idx], 1u, memory_order_release);
}
static inline void tiny_tls_request_trim(int class_idx, uint64_t epoch) {
atomic_store_explicit(&g_tls_trim_epoch[class_idx], epoch, memory_order_release);
atomic_fetch_add_explicit(&g_tls_param_seq[class_idx], 1u, memory_order_release);
}
static inline void tiny_tls_refresh_params(int class_idx, TinyTLSList* tls) {
uint32_t seq = atomic_load_explicit(&g_tls_param_seq[class_idx], memory_order_acquire);
if (__builtin_expect(seq == g_tls_param_seen[class_idx], 1)) {
return;
}
uint32_t target_cap = atomic_load_explicit(&g_tls_target_cap[class_idx], memory_order_acquire);
if (target_cap != 0u && tls->cap != target_cap) {
tls->cap = target_cap;
uint32_t target_refill = atomic_load_explicit(&g_tls_target_refill[class_idx], memory_order_relaxed);
if (target_refill == 0u) target_refill = tiny_tls_default_refill(target_cap);
tls->refill_low = target_refill;
uint32_t target_spill = atomic_load_explicit(&g_tls_target_spill[class_idx], memory_order_relaxed);
if (target_spill < target_cap) target_spill = target_cap;
tls->spill_high = target_spill;
}
uint64_t trim_epoch = atomic_load_explicit(&g_tls_trim_epoch[class_idx], memory_order_acquire);
if (trim_epoch != 0u && g_tls_trim_seen[class_idx] != trim_epoch) {
g_tls_trim_seen[class_idx] = trim_epoch;
if (tls->count > tls->cap) {
tls_list_spill_excess(class_idx, tls);
}
}
g_tls_param_seen[class_idx] = seq;
}
// ============================================================================
// EXTRACTED TO hakmem_tiny_fastcache.inc.h (Phase 2D-1)
// ============================================================================
// Functions: tiny_fast_pop(), tiny_fast_push() - 28 lines (lines 377-404)
// Forward declarations for functions defined in hakmem_tiny_fastcache.inc.h
static inline void* tiny_fast_pop(int class_idx);
static inline int tiny_fast_push(int class_idx, void* ptr);
static inline void* fastcache_pop(int class_idx);
static inline int fastcache_push(int class_idx, void* ptr);
// ============================================================================
// EXTRACTED TO hakmem_tiny_hot_pop.inc.h (Phase 2D-1)
// ============================================================================
// Functions: tiny_hot_pop_class0(), tiny_hot_pop_class1(), tiny_hot_pop_class2(), tiny_hot_pop_class3()
// 88 lines (lines 407-494)
static __attribute__((cold, noinline, unused)) void* tiny_slow_alloc_fast(int class_idx) {
int tls_enabled = g_tls_list_enable;
TinyTLSList* tls = &g_tls_lists[class_idx];
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
pthread_mutex_lock(lock);
TinySlab* slab = g_tiny_pool.free_slabs[class_idx];
if (slab) {
g_tiny_pool.free_slabs[class_idx] = slab->next;
} else {
slab = allocate_new_slab(class_idx);
if (!slab) {
pthread_mutex_unlock(lock);
return NULL;
}
}
slab->next = NULL;
if (atomic_load_explicit(&slab->remote_head, memory_order_acquire)) {
tiny_remote_drain_locked(slab);
}
int block_idx = hak_tiny_find_free_block(slab);
if (block_idx < 0) {
slab->next = g_tiny_pool.free_slabs[class_idx];
g_tiny_pool.free_slabs[class_idx] = slab;
pthread_mutex_unlock(lock);
return NULL;
}
hak_tiny_set_used(slab, block_idx);
slab->free_count--;
size_t block_size = g_tiny_class_sizes[class_idx];
uint8_t* base = (uint8_t*)slab->base;
void* ret = (void*)(base + ((size_t)block_idx * block_size));
g_tiny_pool.alloc_count[class_idx]++;
uint16_t cap = g_fast_cap_defaults[class_idx];
uint16_t count = g_fast_count[class_idx];
uint16_t fast_need = (cap > count) ? (uint16_t)(cap - count) : 0;
if (fast_need > slab->free_count) fast_need = (uint16_t)slab->free_count;
uint32_t tls_need = 0;
if (tls_enabled && tls_list_needs_refill(tls)) {
uint32_t target = tls_list_refill_threshold(tls);
if (tls->count < target) {
tls_need = target - tls->count;
}
}
uint32_t remaining = slab->free_count;
if (fast_need > remaining) fast_need = (uint16_t)remaining;
remaining -= fast_need;
if (tls_need > remaining) tls_need = remaining;
while (fast_need > 0) {
int extra_idx = hak_tiny_find_free_block(slab);
if (extra_idx < 0) break;
hak_tiny_set_used(slab, extra_idx);
slab->free_count--;
void* extra = (void*)(base + ((size_t)extra_idx * block_size));
int pushed = 0;
if (__builtin_expect(g_fastcache_enable && class_idx <= 3, 1)) {
pushed = fastcache_push(class_idx, extra);
} else {
pushed = tiny_fast_push(class_idx, extra);
}
if (!pushed) {
if (tls_enabled) {
tiny_tls_list_guard_push(class_idx, tls, extra);
tls_list_push(tls, extra, class_idx);
}
}
fast_need--;
}
while (tls_enabled && tls_need > 0) {
int extra_idx = hak_tiny_find_free_block(slab);
if (extra_idx < 0) break;
hak_tiny_set_used(slab, extra_idx);
slab->free_count--;
void* extra = (void*)(base + ((size_t)extra_idx * block_size));
tiny_tls_list_guard_push(class_idx, tls, extra);
tls_list_push(tls, extra, class_idx);
tls_need--;
}
if (slab->free_count == 0) {
move_to_full_list(class_idx, slab);
} else {
slab->next = g_tiny_pool.free_slabs[class_idx];
g_tiny_pool.free_slabs[class_idx] = slab;
}
pthread_mutex_unlock(lock);
return ret;
}
// ============================================================================
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
// ============================================================================
// Function: tiny_fast_refill_and_take() - 39 lines (lines 584-622)
// Hot-path cheap sampling counter to avoid rand() in allocation path
// Phase 9.4: TLS single-linked freelist (mimalloc-inspired) for hottest classes (≤128B/≤256B)
int g_tls_sll_enable = 1; // HAKMEM_TINY_TLS_SLL=0 to disable
// Phase 6-1.7: Export TLS variables for box refactor (Box 5/6 need access from hakmem.c)
// CRITICAL FIX: Explicit initializers prevent SEGV from uninitialized TLS in worker threads
// PRIORITY 3: TLS Canaries - Add canaries around TLS arrays to detect buffer overruns
#define TLS_CANARY_MAGIC 0xDEADBEEFDEADBEEFULL
__thread uint64_t g_tls_canary_before_sll_head = TLS_CANARY_MAGIC;
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
__thread void* g_tls_sll_head[TINY_NUM_CLASSES] = {0};
#else
static __thread void* g_tls_sll_head[TINY_NUM_CLASSES] = {0};
#endif
__thread uint64_t g_tls_canary_after_sll_head = TLS_CANARY_MAGIC;
__thread uint64_t g_tls_canary_before_sll_count = TLS_CANARY_MAGIC;
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
__thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES] = {0};
#else
static __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES] = {0};
#endif
__thread uint64_t g_tls_canary_after_sll_count = TLS_CANARY_MAGIC;
static int g_tiny_ultra = 0; // HAKMEM_TINY_ULTRA=1 for SLL-only ultra mode
static int g_ultra_validate = 0; // HAKMEM_TINY_ULTRA_VALIDATE=1 to enable per-pop validation
// Ultra debug counters
#if HAKMEM_DEBUG_COUNTERS
static __attribute__((unused)) uint64_t g_ultra_pop_hits[TINY_NUM_CLASSES] = {0};
static uint64_t g_ultra_refill_calls[TINY_NUM_CLASSES] = {0};
static __attribute__((unused)) uint64_t g_ultra_resets[TINY_NUM_CLASSES] = {0};
#endif
// Path counters (normal mode visibility): lightweight, for debugging/bench only
#if HAKMEM_DEBUG_COUNTERS
static __attribute__((unused)) uint64_t g_path_sll_pop[TINY_NUM_CLASSES] = {0};
static __attribute__((unused)) uint64_t g_path_mag_pop[TINY_NUM_CLASSES] = {0};
static __attribute__((unused)) uint64_t g_path_front_pop[TINY_NUM_CLASSES] = {0};
static __attribute__((unused)) uint64_t g_path_superslab[TINY_NUM_CLASSES] = {0};
static __attribute__((unused)) uint64_t g_path_refill_calls[TINY_NUM_CLASSES] = {0};
// New: slow/bitmap/bump/bin instrumentation
static __attribute__((unused)) uint64_t g_alloc_slow_calls[TINY_NUM_CLASSES] = {0};
static __attribute__((unused)) uint64_t g_superslab_refill_calls_dbg[TINY_NUM_CLASSES] = {0};
static __attribute__((unused)) uint64_t g_bitmap_scan_calls[TINY_NUM_CLASSES] = {0};
static __attribute__((unused)) uint64_t g_bgbin_pops[TINY_NUM_CLASSES] = {0};
static __attribute__((unused)) uint64_t g_bump_hits[TINY_NUM_CLASSES] = {0};
static __attribute__((unused)) uint64_t g_bump_arms[TINY_NUM_CLASSES] = {0};
static __attribute__((unused)) uint64_t g_spec_calls[TINY_NUM_CLASSES] = {0};
static __attribute__((unused)) uint64_t g_spec_hits[TINY_NUM_CLASSES] = {0};
#endif
static int g_path_debug_enabled = 0;
// Spill hysteresisfreeホットパスからgetenvを排除
static int g_spill_hyst = 32; // default margin (configured at init; never getenv on hot path)
// Optional per-class refill batch overrides (0=use global defaults)
static int g_refill_max_c[TINY_NUM_CLASSES] = {0};
static int g_refill_max_hot_c[TINY_NUM_CLASSES] = {0};
static inline __attribute__((always_inline)) int tiny_refill_max_for_class(int class_idx) {
int v = g_refill_max_c[class_idx];
if (v > 0) return v;
if (class_idx <= 3) {
int hv = g_refill_max_hot_c[class_idx];
if (hv > 0) return hv;
return g_tiny_refill_max_hot;
}
return g_tiny_refill_max;
}
// Phase 9.5: Frontend/Backend split - Tiny FastCache (array stack)
// Enabled via HAKMEM_TINY_FASTCACHE=1 (default: 0)
// Compile-out: define HAKMEM_TINY_NO_FRONT_CACHE=1 to exclude this path
#define TINY_FASTCACHE_CAP 128
typedef struct __attribute__((aligned(64))) {
void* items[TINY_FASTCACHE_CAP];
int top;
int _pad[15];
} TinyFastCache;
static __thread TinyFastCache g_fast_cache[TINY_NUM_CLASSES];
static int g_frontend_enable = 0; // HAKMEM_TINY_FRONTEND=1 (experimental ultra-fast frontend)
// SLL capacity multiplier for hot tiny classes (env: HAKMEM_SLL_MULTIPLIER)
int g_sll_multiplier = 2;
// Cached thread id (uint32) to avoid repeated pthread_self() in hot paths
static __thread uint32_t g_tls_tid32;
static __thread int g_tls_tid32_inited;
// Phase 6-1.7: Export for box refactor (Box 6 needs access from hakmem.c)
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
inline __attribute__((always_inline)) uint32_t tiny_self_u32(void) {
#else
static inline __attribute__((always_inline)) uint32_t tiny_self_u32(void) {
#endif
if (__builtin_expect(!g_tls_tid32_inited, 0)) {
g_tls_tid32 = (uint32_t)(uintptr_t)pthread_self();
g_tls_tid32_inited = 1;
}
return g_tls_tid32;
}
// Cached pthread_t as-is for APIs that require pthread_t comparison
static __thread pthread_t g_tls_pt_self;
static __thread int g_tls_pt_inited;
// Frontend FastCache hit/miss counters (Small diagnostics)
unsigned long long g_front_fc_hit[TINY_NUM_CLASSES] = {0};
unsigned long long g_front_fc_miss[TINY_NUM_CLASSES] = {0};
// Phase 6-1.7: Export for box refactor (Box 6 needs access from hakmem.c)
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
inline __attribute__((always_inline)) pthread_t tiny_self_pt(void) {
#else
static inline __attribute__((always_inline)) pthread_t tiny_self_pt(void) {
#endif
if (__builtin_expect(!g_tls_pt_inited, 0)) {
g_tls_pt_self = pthread_self();
g_tls_pt_inited = 1;
}
return g_tls_pt_self;
}
#include "tiny_refill.h"
// tiny_mmap_gate.h already included at top
#include "tiny_publish.h"
int g_sll_cap_override[TINY_NUM_CLASSES] = {0}; // LEGACY (Phase12以降は参照しない互換用ダミー)
// Optional prefetch on SLL pop (guarded by env: HAKMEM_TINY_PREFETCH=1)
static int g_tiny_prefetch = 0;
// Small-class magazine pre-initialization (to avoid cap==0 checks on hot path)
// Hot-class small TLS magazine実体とスイッチ
typedef struct {
void* slots[128];
uint16_t top; // 0..128
uint16_t cap; // =128
} TinyHotMag;
static int g_hotmag_cap_default = 128; // default capacity (env override)
static int g_hotmag_refill_default = 32; // default refill batch (env override)
static int g_hotmag_enable = 0; // 既定OFFA/B用。envでON可。
static uint16_t g_hotmag_cap_current[TINY_NUM_CLASSES];
static uint8_t g_hotmag_cap_locked[TINY_NUM_CLASSES];
static uint16_t g_hotmag_refill_current[TINY_NUM_CLASSES];
static uint8_t g_hotmag_refill_locked[TINY_NUM_CLASSES];
static uint8_t g_hotmag_class_en[TINY_NUM_CLASSES]; // 0=disabled for class, 1=enabled
static __thread TinyHotMag g_tls_hot_mag[TINY_NUM_CLASSES];
// Inline helpers
#include "box/tls_sll_box.h" // Box TLS-SLL: Safe SLL operations API (needed by hotmag)
#include "hakmem_tiny_hotmag.inc.h"
// Size-specialized tiny alloc (32B/64B) via function pointers (A/B用)
// TinyQuickSlot: 1 cache line per class (quick 6 items + small metadata)
// Opt-in via HAKMEM_TINY_QUICK=1
// NOTE: This type definition must come BEFORE the Phase 2D-1 includes below
typedef struct __attribute__((aligned(64))) {
void* items[6]; // 48B
uint8_t top; // 1B (0..6)
uint8_t _pad1; // 1B
uint16_t _pad2; // 2B
uint32_t _pad3; // 4B (padding to 64B)
} TinyQuickSlot;
static int g_quick_enable = 0; // HAKMEM_TINY_QUICK=1
static __thread TinyQuickSlot g_tls_quick[TINY_NUM_CLASSES]; // compile-out via guards below
// Phase 2D-1: Hot-path inline function extractions
// NOTE: These includes require TinyFastCache, TinyQuickSlot, and TinyTLSSlab to be fully defined
#include "hakmem_tiny_hot_pop.inc.h" // 4 functions: tiny_hot_pop_class{0..3}
#include "hakmem_tiny_fastcache.inc.h" // 5 functions: tiny_fast_pop/push, fastcache_pop/push, quick_pop
#include "hakmem_tiny_refill.inc.h" // 8 functions: refill operations
// Phase 7 Task 3: Pre-warm TLS cache at init
// Pre-allocate blocks to reduce first-allocation miss penalty
#if HAKMEM_TINY_PREWARM_TLS
void hak_tiny_prewarm_tls_cache(void) {
// Pre-warm each class with HAKMEM_TINY_PREWARM_COUNT blocks
// This reduces the first-allocation miss penalty by populating TLS cache
// Phase E1-CORRECT: ALL classes (including C7) now use TLS SLL
for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
int count = HAKMEM_TINY_PREWARM_COUNT; // Default: 16 blocks per class
// Trigger refill to populate TLS cache
// P0 Fix: Use appropriate refill function based on P0 status
#if HAKMEM_TINY_P0_BATCH_REFILL
sll_refill_batch_from_ss(class_idx, count);
#else
sll_refill_small_from_ss(class_idx, count);
#endif
}
}
#endif
// Ultra-Simple front (small per-class stack) — combines tiny front to minimize
// instructions and memory touches on alloc/free. Uses existing TLS bump shadow
// (g_tls_bcur/bend) when enabled to avoid per-alloc header writes.
// UltraFront capacity for 32/64B fast pop
#ifndef ULTRA_FRONT_CAP
#define ULTRA_FRONT_CAP 64
#endif
typedef struct __attribute__((aligned(64))) {
void* slots[ULTRA_FRONT_CAP];
uint16_t top; // 0..ULTRA_FRONT_CAP
uint16_t _pad;
} TinyUltraFront;
static int g_ultra_simple = 0; // HAKMEM_TINY_ULTRA_SIMPLE=1
static __thread TinyUltraFront g_tls_ultra[TINY_NUM_CLASSES];
// Inline helpers
#include "hakmem_tiny_ultra_front.inc.h"
// Ultra-Bump TLS shadow (bench/opt-in): keep a TLS-only bump window
// to avoid per-alloc header writes. Header is updated per-chunk reservation.
// NOTE: Non-static because used in hakmem_tiny_refill.inc.h
int g_bump_chunk = 32; // HAKMEM_TINY_BUMP_CHUNK (blocks)
__thread uint8_t* g_tls_bcur[TINY_NUM_CLASSES] = {0};
__thread uint8_t* g_tls_bend[TINY_NUM_CLASSES] = {0};
// SLL small refill batch for specialized class (32/64B)
// Specialized order toggle: 1 = mag-first, 0 = sll-first
// HotMag helpers (for classes 0..3)
static inline int is_hot_class(int class_idx) { return class_idx >= 0 && class_idx <= 3; }
// Optional front (Ultra/HotMag) push helper: compile-out in release builds
static inline int tiny_optional_push(int class_idx, void* ptr) {
#if HAKMEM_BUILD_RELEASE
(void)class_idx;
(void)ptr;
return 0;
#else
if (__builtin_expect(g_ultra_simple && is_hot_class(class_idx), 0)) {
if (__builtin_expect(ultra_push(class_idx, ptr), 0)) {
return 1;
}
}
if (__builtin_expect(is_hot_class(class_idx), 0)) {
if (__builtin_expect(hotmag_push(class_idx, ptr), 0)) {
return 1;
}
}
return 0;
#endif
}
// Ultra-Simple helpers
// Phase 9.6: Deferred Intelligence (event queue + background)
// Extended event for FLINT Intelligence (lightweight; recorded off hot path only)
// Observability, ACE, and intelligence helpers
#include "hakmem_tiny_intel.inc"
// ============================================================================
// EXTRACTED TO hakmem_tiny_rss.c (Phase 2B-2)
// ============================================================================
// EXTRACTED: static int get_rss_kb_self(void) {
// EXTRACTED: FILE* f = fopen("/proc/self/status", "r");
// EXTRACTED: if (!f) return 0;
// EXTRACTED: char buf[256];
// EXTRACTED: int kb = 0;
// EXTRACTED: while (fgets(buf, sizeof(buf), f)) {
// EXTRACTED: if (strncmp(buf, "VmRSS:", 6) == 0) {
// EXTRACTED: char* p = buf;
// EXTRACTED: while (*p && (*p < '0' || *p > '9')) {
// EXTRACTED: p++;
// EXTRACTED: }
// EXTRACTED: kb = atoi(p);
// EXTRACTED: break;
// EXTRACTED: }
// EXTRACTED: }
// EXTRACTED: fclose(f);
// EXTRACTED: return kb;
// EXTRACTED: }
// Miss時にマガジンへ大量リフィルせず、1個だけ確保して即返すオプション
// Env: HAKMEM_TINY_REFILL_ONE_ON_MISS=1 で有効(デフォルト: 0
int g_refill_one_on_miss = 0;
// Frontend fill target per class (adaptive)
// NOTE: Non-static because used in hakmem_tiny_refill.inc.h
_Atomic uint32_t g_frontend_fill_target[TINY_NUM_CLASSES];
// Forward declarations for helpers referenced by frontend_refill_fc
static inline int ultra_batch_for_class(int class_idx);
enum { HAK_TIER_SLL=1, HAK_TIER_MAG=2, HAK_TIER_SLAB=3, HAK_TIER_SUPER=4, HAK_TIER_FRONT=5 };
static inline uint16_t hak_thread_id16(void) {
// best-effort compress cached thread id to 16 bits
uint32_t tid = tiny_self_u32();
return (uint16_t)(tid ^ (tid >> 16));
}
static inline void eventq_push_ex(int class_idx, uint32_t size, uint8_t tier, uint8_t flags,
uint32_t site_id, uint16_t lat_bucket) {
(void)flags;
(void)lat_bucket;
(void)site_id;
if (!g_int_engine) return;
// Lightweight sampling: if mask set, log 1 out of 2^N
unsigned m = g_int_sample_mask;
if (m != 0) {
unsigned x = g_tls_ev_seq++;
if ((x & m) != 0) return;
}
uint32_t t = atomic_fetch_add_explicit(&g_ev_tail, 1u, memory_order_relaxed);
AllocEvent ev;
ev.ts_ns = g_int_event_ts ? hak_now_ns() : 0;
ev.size = size;
ev.site_id = 0; // keep minimal
ev.latency_bucket = 0;
ev.tier_hit = tier;
ev.flags = 0;
ev.class_idx = (uint16_t)class_idx;
ev.thread_id = 0;
g_ev_ring[t & EVENTQ_MASK] = ev; // best-effort overwrite on overflow
}
// Background refill workers and intelligence engine
#include "hakmem_tiny_background.inc"
// ============================================================================
// EXTRACTED TO hakmem_tiny_fastcache.inc.h (Phase 2D-1)
// ============================================================================
// Functions: fastcache_pop(), fastcache_push(), quick_pop() - 25 lines (lines 873-896)
// Ultra-fast try-only variant: attempt a direct SuperSlab bump/freelist pop
// without any refill or slow-path work. Returns NULL on miss.
static inline void* hak_tiny_alloc_superslab_try_fast(int class_idx) {
if (!g_use_superslab) return NULL;
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
TinySlabMeta* meta = tls->meta;
if (!meta) return NULL;
// Try linear (bump) allocation first when freelist is empty
if (meta->freelist == NULL && meta->used < meta->capacity && tls->slab_base) {
// Use per-slab class_idx to get stride
size_t block_size = tiny_stride_for_class(meta->class_idx);
void* block = tls->slab_base + ((size_t)meta->used * block_size);
meta->used++;
// Track active blocks in SuperSlab for conservative reclamation
ss_active_inc(tls->ss);
return block;
}
// Do not pop freelist here (keep magazine/SLL handling consistent)
return NULL;
}
// ============================================================================
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
// ============================================================================
// Functions: quick_refill_from_sll(), quick_refill_from_mag() - 31 lines (lines 918-949)
// ============================================================================
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
// ============================================================================
// Function: sll_refill_small_from_ss() - 45 lines (lines 952-996)
// Phase 2C-3: TLS operations module (included after helper function definitions)
#include "hakmem_tiny_tls_ops.h"
// New TLS list refill: owner-only bulk take from TLS-cached SuperSlab slab
// ============================================================================
// EXTRACTED TO hakmem_tiny_tls_ops.h (Phase 2C-3)
// ============================================================================
// Function: tls_refill_from_tls_slab() - 101 lines
// Hot path refill operation, moved to inline function in header
// ============================================================================
// EXTRACTED TO hakmem_tiny_tls_ops.h (Phase 2C-3)
// ============================================================================
// Function: tls_list_spill_excess() - 97 lines
// Hot path spill operation, moved to inline function in header
// ============================================================================
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
// ============================================================================
// Function: superslab_tls_bump_fast() - 45 lines (lines 1016-1060)
// ============================================================================
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
// ============================================================================
// Function: frontend_refill_fc() - 44 lines (lines 1063-1106)
// SLL capacity policy: for hot tiny classes (0..3), allow larger SLL up to multiplier * mag_cap
// for >=4 keep current conservative half (to limit footprint).
static inline uint32_t sll_cap_for_class(int class_idx, uint32_t mag_cap) {
// Phase12: g_sll_cap_override は非推奨。ここでは無視して通常capを返す。
uint32_t cap = mag_cap;
if (class_idx <= 3) {
uint32_t mult = (g_sll_multiplier > 0 ? (uint32_t)g_sll_multiplier : 1u);
uint64_t want = (uint64_t)cap * (uint64_t)mult;
if (want > (uint64_t)TINY_TLS_MAG_CAP) cap = TINY_TLS_MAG_CAP; else cap = (uint32_t)want;
} else if (class_idx >= 4) {
cap = (mag_cap > 1u ? (mag_cap / 2u) : 1u);
}
return cap;
}
// ============================================================================
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
// ============================================================================
// Function: bulk_mag_to_sll_if_room() - 22 lines (lines 1133-1154)
// Ultra helpers forward declarations (defined later)
static inline int ultra_sll_cap_for_class(int class_idx);
static inline int ultra_validate_sll_head(int class_idx, void* head);
// Ultra-mode (SLL-only) helpers
// Ultra batch overrides via env: HAKMEM_TINY_ULTRA_BATCH_C{0..7}
static int g_ultra_batch_override[TINY_NUM_CLASSES] = {0};
static int g_ultra_sll_cap_override[TINY_NUM_CLASSES] = {0};
static inline int ultra_batch_for_class(int class_idx) {
int ov = g_ultra_batch_override[class_idx];
if (ov > 0) return ov;
switch (class_idx) {
case 0: return 64; // 8B
case 1: return 96; // 16BA/B最良
case 2: return 96; // 32BA/B最良
case 3: return 224; // 64BA/B最良
case 4: return 96; // 128B (promote front refill a bit)
case 5: return 64; // 256B (promote front refill)
case 6: return 64; // 512B (promote front refill)
default: return 32; // 1024B and others
}
}
// ============================================================================
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
// ============================================================================
// Function: ultra_refill_sll() - 56 lines (lines 1178-1233)
#include "hakmem_tiny_remote.inc"
// ============================================================================
// Internal Helpers
// ============================================================================
// Step 2: Slab Registry Operations
// Hash function for slab_base (64KB aligned)
// ============================================================================
// EXTRACTED TO hakmem_tiny_registry.c (Phase 2B-3)
// ============================================================================
// EXTRACTED: static inline int registry_hash(uintptr_t slab_base) {
// EXTRACTED: return (slab_base >> 16) & SLAB_REGISTRY_MASK;
// EXTRACTED: }
// Register slab in hash table (returns 1 on success, 0 on failure)
// EXTRACTED: static int registry_register(uintptr_t slab_base, TinySlab* owner) {
// EXTRACTED: pthread_mutex_lock(&g_tiny_registry_lock);
// EXTRACTED: int hash = registry_hash(slab_base);
// EXTRACTED:
// EXTRACTED: // Linear probing (max 8 attempts)
// EXTRACTED: for (int i = 0; i < SLAB_REGISTRY_MAX_PROBE; i++) {
// EXTRACTED: int idx = (hash + i) & SLAB_REGISTRY_MASK;
// EXTRACTED: SlabRegistryEntry* entry = &g_slab_registry[idx];
// EXTRACTED:
// EXTRACTED: if (entry->slab_base == 0) {
// EXTRACTED: // Empty slot found
// EXTRACTED: entry->slab_base = slab_base;
// EXTRACTED: atomic_store_explicit(&entry->owner, owner, memory_order_release);
// EXTRACTED: pthread_mutex_unlock(&g_tiny_registry_lock);
// EXTRACTED: return 1;
// EXTRACTED: }
// EXTRACTED: }
// EXTRACTED:
// EXTRACTED: // Registry full (collision limit exceeded)
// EXTRACTED: pthread_mutex_unlock(&g_tiny_registry_lock);
// EXTRACTED: return 0;
// EXTRACTED: }
// Unregister slab from hash table
// EXTRACTED: static void registry_unregister(uintptr_t slab_base) {
// EXTRACTED: pthread_mutex_lock(&g_tiny_registry_lock);
// EXTRACTED: int hash = registry_hash(slab_base);
// EXTRACTED:
// EXTRACTED: // Linear probing search
// EXTRACTED: for (int i = 0; i < SLAB_REGISTRY_MAX_PROBE; i++) {
// EXTRACTED: int idx = (hash + i) & SLAB_REGISTRY_MASK;
// EXTRACTED: SlabRegistryEntry* entry = &g_slab_registry[idx];
// EXTRACTED:
// EXTRACTED: if (entry->slab_base == slab_base) {
// EXTRACTED: // Found - clear entry (atomic store prevents TOCTOU race)
// EXTRACTED: atomic_store_explicit(&entry->owner, NULL, memory_order_release);
// EXTRACTED: entry->slab_base = 0;
// EXTRACTED: pthread_mutex_unlock(&g_tiny_registry_lock);
// EXTRACTED: return;
// EXTRACTED: }
// EXTRACTED:
// EXTRACTED: if (entry->slab_base == 0) {
// EXTRACTED: // Empty slot - not found
// EXTRACTED: pthread_mutex_unlock(&g_tiny_registry_lock);
// EXTRACTED: return;
// EXTRACTED: }
// EXTRACTED: }
// EXTRACTED: pthread_mutex_unlock(&g_tiny_registry_lock);
// EXTRACTED: }
// Lookup slab by base address (O(1) average)
static TinySlab* registry_lookup(uintptr_t slab_base) {
// Lock-free read with atomic owner access (MT-safe)
int hash = registry_hash(slab_base);
// Linear probing search
for (int i = 0; i < SLAB_REGISTRY_MAX_PROBE; i++) {
int idx = (hash + i) & SLAB_REGISTRY_MASK;
SlabRegistryEntry* entry = &g_slab_registry[idx];
if (entry->slab_base == slab_base) {
// Atomic load to prevent TOCTOU race with registry_unregister()
TinySlab* owner = atomic_load_explicit(&entry->owner, memory_order_acquire);
if (!owner) return NULL; // Entry cleared by unregister
return owner;
}
if (entry->slab_base == 0) {
return NULL; // Empty slot - not found
}
}
return NULL; // Not found after max probes
}
// ============================================================================
// EXTRACTED TO hakmem_tiny_slab_mgmt.inc (Phase 2D-4 FINAL)
// ============================================================================
// Function: allocate_new_slab() - 79 lines (lines 952-1030)
// Allocate new slab for a class
// Function: release_slab() - 23 lines (lines 1033-1055)
// Release a slab back to system
// Step 2: Find slab owner by pointer (O(1) via hash table registry, or O(N) fallback)
TinySlab* hak_tiny_owner_slab(void* ptr) {
if (!ptr || !g_tiny_initialized) return NULL;
// Phase 6.14: Runtime toggle between Registry (O(1)) and List (O(N))
if (g_use_registry) {
// O(1) lookup via hash table
uintptr_t slab_base = (uintptr_t)ptr & ~(TINY_SLAB_SIZE - 1);
TinySlab* slab = registry_lookup(slab_base);
if (!slab) return NULL;
// SAFETY: validate membership (ptr must be inside [base, base+64KB))
uintptr_t start = (uintptr_t)slab->base;
uintptr_t end = start + TINY_SLAB_SIZE;
if ((uintptr_t)ptr < start || (uintptr_t)ptr >= end) {
return NULL; // false positive from registry → treat as non-Tiny
}
return slab;
} else {
// O(N) fallback: linear search through all slab lists (lock per class)
for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
pthread_mutex_lock(lock);
// Search free slabs
for (TinySlab* slab = g_tiny_pool.free_slabs[class_idx]; slab; slab = slab->next) {
uintptr_t slab_start = (uintptr_t)slab->base;
uintptr_t slab_end = slab_start + TINY_SLAB_SIZE;
if ((uintptr_t)ptr >= slab_start && (uintptr_t)ptr < slab_end) {
pthread_mutex_unlock(lock);
return slab;
}
}
// Search full slabs
for (TinySlab* slab = g_tiny_pool.full_slabs[class_idx]; slab; slab = slab->next) {
uintptr_t slab_start = (uintptr_t)slab->base;
uintptr_t slab_end = slab_start + TINY_SLAB_SIZE;
if ((uintptr_t)ptr >= slab_start && (uintptr_t)ptr < slab_end) {
pthread_mutex_unlock(lock);
return slab;
}
}
pthread_mutex_unlock(lock);
}
return NULL; // Not found
}
}
// Function: move_to_full_list() - 20 lines (lines 1104-1123)
// Move slab to full list
// Function: move_to_free_list() - 20 lines (lines 1126-1145)
// Move slab to free list
// ============================================================================
// Public API
// ============================================================================
// ============================================================================
// Phase 2D-2: Initialization function (extracted to hakmem_tiny_init.inc)
// ============================================================================
#include "hakmem_tiny_init.inc"
// ============================================================================
// 3-Layer Architecture (2025-11-01 Simplification)
// ============================================================================
// Layer 1: TLS Bump Allocator (ultra-fast, 2-3 instructions/op)
#include "hakmem_tiny_bump.inc.h"
// Layer 2: TLS Small Magazine (fast, 5-10 instructions/op)
#include "hakmem_tiny_smallmag.inc.h"
// ============================================================================
// Phase 6 Fast Path Options (mutually exclusive)
// ============================================================================
// Choose ONE of the following Phase 6 optimizations:
//
// Phase 6-1.5: Alignment Guessing (LEGACY - committed 2025-11-02)
// - Enable: -DHAKMEM_TINY_PHASE6_ULTRA_SIMPLE=1
// - Speed: 235 M ops/sec
// - Memory: 0% overhead
// - Method: Guess size class from pointer alignment (__builtin_ctzl)
// - Risk: Alignment assumptions may break with future changes
//
// Phase 6-1.6: Metadata Header (NEW - recommended for production)
// - Enable: -DHAKMEM_TINY_PHASE6_METADATA=1
// - Speed: 450-480 M ops/sec (expected, Phase 6-1 level)
// - Memory: ~6-12% overhead (8 bytes/allocation)
// - Method: Store pool_type + size_class in 8-byte header
// - Benefit: Extends to ALL pools (Tiny/Mid/L25/Whale)
// - Eliminates: Registry lookups, mid_lookup, owner checks
// ============================================================================
// Forward declarations for Phase 6 alloc/free functions
#ifdef HAKMEM_TINY_PHASE6_ULTRA_SIMPLE
void* hak_tiny_alloc_ultra_simple(size_t size);
void hak_tiny_free_ultra_simple(void* ptr);
#endif
#if defined(HAKMEM_TINY_PHASE6_METADATA) && defined(HAKMEM_TINY_PHASE6_ULTRA_SIMPLE)
#error "Cannot enable both PHASE6_METADATA and PHASE6_ULTRA_SIMPLE"
#endif
// Phase 6-1.7: Box Theory Refactoring - Mutual exclusion check
#if HAKMEM_TINY_PHASE6_BOX_REFACTOR
#if defined(HAKMEM_TINY_PHASE6_METADATA) || defined(HAKMEM_TINY_PHASE6_ULTRA_SIMPLE)
#error "Cannot enable PHASE6_BOX_REFACTOR with other Phase 6 options"
#endif
// Box 1: Atomic Operations (Layer 0 - Foundation)
#include "tiny_atomic.h"
// Box 5: Allocation Fast Path (Layer 1 - 3-4 instructions)
#include "tiny_alloc_fast.inc.h"
// Box 6: Free Fast Path (Layer 2 - 2-3 instructions)
#include "tiny_free_fast.inc.h"
// ---------------- Refill count (Front) global config ----------------
// Parsed once at init; hot path reads plain ints (no getenv).
int g_refill_count_global = 0; // HAKMEM_TINY_REFILL_COUNT
int g_refill_count_hot = 0; // HAKMEM_TINY_REFILL_COUNT_HOT
int g_refill_count_mid = 0; // HAKMEM_TINY_REFILL_COUNT_MID
int g_refill_count_class[TINY_NUM_CLASSES] = {0}; // HAKMEM_TINY_REFILL_COUNT_C{0..7}
// Export wrapper functions for hakmem.c to call
// Phase 6-1.7 Optimization: Remove diagnostic overhead, rely on LTO for inlining
void* hak_tiny_alloc_fast_wrapper(size_t size) {
static _Atomic uint64_t wrapper_call_count = 0;
uint64_t call_num = atomic_fetch_add(&wrapper_call_count, 1);
// Pointer tracking init (first call only)
PTR_TRACK_INIT();
// PRIORITY 3: Periodic canary validation (every 1000 ops)
periodic_canary_check(call_num, "hak_tiny_alloc_fast_wrapper");
// Box I: Periodic full integrity check (every 5000 ops)
#if HAKMEM_INTEGRITY_LEVEL >= 3
if ((call_num % 5000) == 0) {
extern void integrity_periodic_full_check(const char*);
integrity_periodic_full_check("periodic check in alloc wrapper");
}
#endif
#if !HAKMEM_BUILD_RELEASE
if (call_num > 14250 && call_num < 14280 && size <= 1024) {
fprintf(stderr, "[HAK_TINY_ALLOC_FAST_WRAPPER] call=%lu size=%zu\n", call_num, size);
fflush(stderr);
}
#endif
// Diagnostic removed - use HAKMEM_TINY_FRONT_DIAG in tiny_alloc_fast_pop if needed
void* result = tiny_alloc_fast(size);
#if !HAKMEM_BUILD_RELEASE
if (call_num > 14250 && call_num < 14280 && size <= 1024) {
fprintf(stderr, "[HAK_TINY_ALLOC_FAST_WRAPPER] call=%lu returned %p\n", call_num, result);
fflush(stderr);
}
#endif
return result;
}
void hak_tiny_free_fast_wrapper(void* ptr) {
static _Atomic uint64_t free_call_count = 0;
uint64_t call_num = atomic_fetch_add(&free_call_count, 1);
if (call_num > 14135 && call_num < 14145) {
fprintf(stderr, "[HAK_TINY_FREE_FAST_WRAPPER] call=%lu ptr=%p\n", call_num, ptr);
fflush(stderr);
}
tiny_free_fast(ptr);
if (call_num > 14135 && call_num < 14145) {
fprintf(stderr, "[HAK_TINY_FREE_FAST_WRAPPER] call=%lu completed\n", call_num);
fflush(stderr);
}
}
#elif defined(HAKMEM_TINY_PHASE6_ULTRA_SIMPLE)
// Phase 6-1.5: Alignment guessing (legacy)
// Refill count globals (needed for compatibility)
int g_refill_count_global = 0;
int g_refill_count_hot = 0;
int g_refill_count_mid = 0;
int g_refill_count_class[TINY_NUM_CLASSES] = {0};
#include "hakmem_tiny_ultra_simple.inc"
// Wrapper functions for hakmem.c compatibility (not used in ULTRA_SIMPLE but needed for linking)
void* hak_tiny_alloc_fast_wrapper(size_t size) {
return hak_tiny_alloc_ultra_simple(size);
}
void hak_tiny_free_fast_wrapper(void* ptr) {
hak_tiny_free_ultra_simple(ptr);
}
#elif defined(HAKMEM_TINY_PHASE6_METADATA)
// Phase 6-1.6: Metadata header (recommended)
#include "hakmem_tiny_metadata.inc"
#endif
// Layer 1-3: Main allocation function (simplified)
// Build-time configurable via: -DHAKMEM_TINY_USE_NEW_3LAYER=1
#ifndef HAKMEM_TINY_USE_NEW_3LAYER
#define HAKMEM_TINY_USE_NEW_3LAYER 0 // default OFF (legacy path)
#endif
#if HAKMEM_TINY_USE_NEW_3LAYER
#include "hakmem_tiny_alloc_new.inc"
#else
// Old 6-7 layer architecture (backup)
#include "hakmem_tiny_alloc.inc"
#endif
#include "hakmem_tiny_slow.inc"
// Free path implementations
#include "hakmem_tiny_free.inc"
// ============================================================================
// EXTRACTED TO hakmem_tiny_lifecycle.inc (Phase 2D-3)
// ============================================================================
// Function: hak_tiny_trim() - 116 lines (lines 1164-1279)
// Public trim and cleanup operation for lifecycle management
// Forward decl for internal registry lookup used by ultra safety validation
static TinySlab* registry_lookup(uintptr_t slab_base);
// Ultra helpers: per-class SLL cap and pointer validation
static inline int ultra_sll_cap_for_class(int class_idx) {
int ov = g_ultra_sll_cap_override[class_idx];
if (ov > 0) return ov;
switch (class_idx) {
case 0: return 256; // 8B
case 1: return 384; // 16BA/B最良
case 2: return 384; // 32BA/B最良
case 3: return 768; // 64BA/B最良
case 4: return 256; // 128B
default: return 128; // others
}
}
static inline int ultra_validate_sll_head(int class_idx, void* head) {
uintptr_t base = ((uintptr_t)head) & ~(TINY_SLAB_SIZE - 1);
TinySlab* owner = registry_lookup(base);
if (!owner) return 0;
uintptr_t start = (uintptr_t)owner->base;
if ((uintptr_t)head < start || (uintptr_t)head >= start + TINY_SLAB_SIZE) return 0;
return (owner->class_idx == class_idx);
}
// Optional: wrapper TLS guardラッパー再入検知をTLSカウンタで
#ifndef HAKMEM_WRAPPER_TLS_GUARD
#define HAKMEM_WRAPPER_TLS_GUARD 0
#endif
#if HAKMEM_WRAPPER_TLS_GUARD
extern __thread int g_tls_in_wrapper;
#endif
// ============================================================================
// EXTRACTED TO hakmem_tiny_lifecycle.inc (Phase 2D-3)
// ============================================================================
// Function: tiny_tls_cache_drain() - 90 lines (lines 1314-1403)
// Static function for draining TLS caches
//
// Function: tiny_apply_mem_diet() - 20 lines (lines 1405-1424)
// Static function for memory diet mode application
//
// Phase 2D-3: Lifecycle management functions (226 lines total)
#include "hakmem_tiny_lifecycle.inc"
// Phase 2D-4 (FINAL): Slab management functions (142 lines total)
#include "hakmem_tiny_slab_mgmt.inc"
// ============================================================================
// ACE Learning Layer: Runtime parameter setters
// ============================================================================
void hkm_ace_set_drain_threshold(int class_idx, uint32_t threshold) {
// Validate inputs
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
return;
}
if (threshold < 16 || threshold > 2048) {
return;
}
// Set per-class threshold (used by remote free drain logic)
g_remote_drain_thresh_per_class[class_idx] = (int)threshold;
}
#include "tiny_fc_api.h"
int tiny_fc_room(int class_idx) {
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) return 0;
TinyFastCache* fc = &g_fast_cache[class_idx];
// Effective per-class cap comes from g_fast_cap (env-tunable),
// clamped by the static storage capacity TINY_FASTCACHE_CAP.
uint16_t eff_cap = g_fast_cap[class_idx];
if (eff_cap > TINY_FASTCACHE_CAP) eff_cap = TINY_FASTCACHE_CAP;
int room = (int)eff_cap - fc->top;
return room > 0 ? room : 0;
}
int tiny_fc_push_bulk(int class_idx, void** arr, int n) {
if (!arr || n <= 0) return 0;
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES) return 0;
TinyFastCache* fc = &g_fast_cache[class_idx];
uint16_t eff_cap = g_fast_cap[class_idx];
if (eff_cap > TINY_FASTCACHE_CAP) eff_cap = TINY_FASTCACHE_CAP;
int room = (int)eff_cap - fc->top;
if (room <= 0) return 0;
int take = n < room ? n : room;
// Forward fill with light unrolling to reduce branch overhead
int i = 0;
for (; i + 3 < take; i += 4) {
fc->items[fc->top++] = arr[i];
fc->items[fc->top++] = arr[i + 1];
fc->items[fc->top++] = arr[i + 2];
fc->items[fc->top++] = arr[i + 3];
}
for (; i < take; i++) {
fc->items[fc->top++] = arr[i];
}
return take;
}
// Minimal class5 TLS stats dump (release-safe, one-shot)
// Env: HAKMEM_TINY_CLASS5_STATS_DUMP=1 to enable
static void tiny_class5_stats_dump(void) __attribute__((destructor));
static void tiny_class5_stats_dump(void) {
const char* e = getenv("HAKMEM_TINY_CLASS5_STATS_DUMP");
if (!(e && *e && e[0] != '0')) return;
TinyTLSList* tls5 = &g_tls_lists[5];
fprintf(stderr, "\n=== Class5 TLS (release-min) ===\n");
fprintf(stderr, "hotpath=%d cap=%u refill_low=%u spill_high=%u count=%u\n",
g_tiny_hotpath_class5, tls5->cap, tls5->refill_low, tls5->spill_high, tls5->count);
fprintf(stderr, "===============================\n");
}
// ========= Tiny Guard (targeted debug; low overhead when disabled) =========
static int g_tiny_guard_enabled = -1;
static int g_tiny_guard_class = 2;
static int g_tiny_guard_limit = 8;
static __thread int g_tiny_guard_seen = 0;
static inline int tiny_guard_enabled_runtime(void) {
if (__builtin_expect(g_tiny_guard_enabled == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_GUARD");
g_tiny_guard_enabled = (e && *e && *e != '0') ? 1 : 0;
const char* ec = getenv("HAKMEM_TINY_GUARD_CLASS");
if (ec && *ec) g_tiny_guard_class = atoi(ec);
const char* el = getenv("HAKMEM_TINY_GUARD_MAX");
if (el && *el) g_tiny_guard_limit = atoi(el);
if (g_tiny_guard_limit <= 0) g_tiny_guard_limit = 8;
}
return g_tiny_guard_enabled;
}
int tiny_guard_is_enabled(void) { return tiny_guard_enabled_runtime(); }
static void tiny_guard_dump_bytes(const char* tag, const uint8_t* p, size_t n) {
fprintf(stderr, "[TGUARD] %s:", tag);
for (size_t i = 0; i < n; i++) fprintf(stderr, " %02x", p[i]);
fprintf(stderr, "\n");
}
void tiny_guard_on_alloc(int cls, void* base, void* user, size_t stride) {
if (!tiny_guard_enabled_runtime() || cls != g_tiny_guard_class) return;
if (g_tiny_guard_seen++ >= g_tiny_guard_limit) return;
uint8_t* b = (uint8_t*)base;
uint8_t* u = (uint8_t*)user;
fprintf(stderr, "[TGUARD] alloc cls=%d base=%p user=%p stride=%zu hdr=%02x\n",
cls, base, user, stride, b[0]);
// 隣接ヘッダ可視化(前後)
tiny_guard_dump_bytes("around_base", b, (stride >= 8 ? 8 : stride));
tiny_guard_dump_bytes("next_header", b + stride, 4);
}
void tiny_guard_on_invalid(void* user_ptr, uint8_t hdr) {
if (!tiny_guard_enabled_runtime()) return;
if (g_tiny_guard_seen++ >= g_tiny_guard_limit) return;
uint8_t* u = (uint8_t*)user_ptr;
fprintf(stderr, "[TGUARD] invalid header at user=%p hdr=%02x prev=%02x next=%02x\n",
user_ptr, hdr, *(u - 2), *(u));
tiny_guard_dump_bytes("dump_before", u - 8, 8);
tiny_guard_dump_bytes("dump_after", u, 8);
}