- Feature: Added HAKMEM_TINY_HEADERLESS toggle (A/B testing) - Feature: Implemented Headerless layout logic (Offset=0) - Refactor: Centralized layout definitions in tiny_layout_box.h - Refactor: Abstracted pointer arithmetic in free path via ptr_conversion_box.h - Verification: sh8bench passes in Headerless mode (No TLS_SLL_HDR_RESET) - Known Issue: Regression in Phase 1 mode due to blind pointer conversion logic
211 lines
8.6 KiB
C++
211 lines
8.6 KiB
C++
// hakmem_tiny_config_box.inc
|
||
// Box: Tiny allocator configuration, debug counters, and return helpers.
|
||
// Extracted from hakmem_tiny.c to reduce file size and isolate config logic.
|
||
|
||
// ============================================================================
|
||
// Global Operation Counter (for debug logging)
|
||
// ============================================================================
|
||
#include <stdatomic.h>
|
||
_Atomic uint64_t g_debug_op_count = 0;
|
||
|
||
// ============================================================================
|
||
// Size class table (Box 3 dependency)
|
||
// ============================================================================
|
||
// Phase E1-CORRECT: ALL classes have 1-byte header
|
||
// These sizes represent TOTAL BLOCK SIZE (stride) = [Header 1B][Data N-1B]
|
||
// Usable data = stride - 1 (implicit)
|
||
const size_t g_tiny_class_sizes[TINY_NUM_CLASSES] = {
|
||
8, // Class 0: 8B total = [Header 1B][Data 7B]
|
||
16, // Class 1: 16B total = [Header 1B][Data 15B]
|
||
32, // Class 2: 32B total = [Header 1B][Data 31B]
|
||
64, // Class 3: 64B total = [Header 1B][Data 63B]
|
||
128, // Class 4: 128B total = [Header 1B][Data 127B]
|
||
256, // Class 5: 256B total = [Header 1B][Data 255B]
|
||
512, // Class 6: 512B total = [Header 1B][Data 511B]
|
||
2048 // Class 7: 2048B total = [Header 1B][Data 2047B] (upgraded for 1024B requests)
|
||
};
|
||
|
||
// ============================================================================
|
||
// Phase 16: Dynamic Tiny Max Size (ENV: HAKMEM_TINY_MAX_CLASS)
|
||
// Phase 17-1: Auto-adjust when Small-Mid enabled
|
||
// ============================================================================
|
||
|
||
// Forward declaration for Small-Mid check
|
||
extern bool smallmid_is_enabled(void);
|
||
|
||
// Optimized: Cached max size for hot path
|
||
// Moved to hakmem_tiny.h for global inlining
|
||
|
||
// ============================================================================
|
||
// PRIORITY 1-4: Integrity Check Counters
|
||
// ============================================================================
|
||
_Atomic uint64_t g_integrity_check_class_bounds = 0;
|
||
_Atomic uint64_t g_integrity_check_freelist = 0;
|
||
_Atomic uint64_t g_integrity_check_canary = 0;
|
||
_Atomic uint64_t g_integrity_check_header = 0;
|
||
|
||
// Build-time gate for debug counters (path/ultra). Default OFF.
|
||
#ifndef HAKMEM_DEBUG_COUNTERS
|
||
#define HAKMEM_DEBUG_COUNTERS 0
|
||
#endif
|
||
|
||
int g_debug_fast0 = 0;
|
||
int g_debug_remote_guard = 0;
|
||
int g_remote_force_notify = 0;
|
||
// Tiny free safety (debug)
|
||
int g_tiny_safe_free = 0; // Default OFF for performance; env: HAKMEM_SAFE_FREE=1 でON
|
||
int g_tiny_safe_free_strict = 0; // env: HAKMEM_SAFE_FREE_STRICT=1
|
||
int g_tiny_force_remote = 0; // env: HAKMEM_TINY_FORCE_REMOTE=1
|
||
|
||
// Build-time gate: Minimal Tiny front (bench-only)
|
||
|
||
static inline int superslab_trace_enabled(void) {
|
||
static int g_ss_trace_flag = -1;
|
||
if (__builtin_expect(g_ss_trace_flag == -1, 0)) {
|
||
const char* tr = getenv("HAKMEM_TINY_SUPERSLAB_TRACE");
|
||
g_ss_trace_flag = (tr && atoi(tr) != 0) ? 1 : 0;
|
||
}
|
||
return g_ss_trace_flag;
|
||
}
|
||
// When enabled, physically excludes optional front tiers from the hot path
|
||
// (UltraFront/Quick/Frontend/HotMag/SS-try/BumpShadow), leaving:
|
||
// SLL → TLS Magazine → SuperSlab → (remaining slow path)
|
||
#ifndef HAKMEM_TINY_MINIMAL_FRONT
|
||
#define HAKMEM_TINY_MINIMAL_FRONT 1
|
||
#endif
|
||
// Strict front: compile-out optional front tiers but keep baseline structure intact
|
||
#ifndef HAKMEM_TINY_STRICT_FRONT
|
||
#define HAKMEM_TINY_STRICT_FRONT 0
|
||
#endif
|
||
|
||
// Bench-only fast path knobs (defaults)
|
||
#ifndef HAKMEM_TINY_BENCH_REFILL
|
||
#define HAKMEM_TINY_BENCH_REFILL 8
|
||
#endif
|
||
// Optional per-class overrides (bench-only)
|
||
#ifndef HAKMEM_TINY_BENCH_REFILL8
|
||
#define HAKMEM_TINY_BENCH_REFILL8 HAKMEM_TINY_BENCH_REFILL
|
||
#endif
|
||
#ifndef HAKMEM_TINY_BENCH_REFILL16
|
||
#define HAKMEM_TINY_BENCH_REFILL16 HAKMEM_TINY_BENCH_REFILL
|
||
#endif
|
||
#ifndef HAKMEM_TINY_BENCH_REFILL32
|
||
#define HAKMEM_TINY_BENCH_REFILL32 HAKMEM_TINY_BENCH_REFILL
|
||
#endif
|
||
#ifndef HAKMEM_TINY_BENCH_REFILL64
|
||
#define HAKMEM_TINY_BENCH_REFILL64 HAKMEM_TINY_BENCH_REFILL
|
||
#endif
|
||
|
||
// Bench-only warmup amounts (pre-fill TLS SLL on first alloc per class)
|
||
#ifndef HAKMEM_TINY_BENCH_WARMUP8
|
||
#define HAKMEM_TINY_BENCH_WARMUP8 64
|
||
#endif
|
||
#ifndef HAKMEM_TINY_BENCH_WARMUP16
|
||
#define HAKMEM_TINY_BENCH_WARMUP16 96
|
||
#endif
|
||
#ifndef HAKMEM_TINY_BENCH_WARMUP32
|
||
#define HAKMEM_TINY_BENCH_WARMUP32 160
|
||
#endif
|
||
#ifndef HAKMEM_TINY_BENCH_WARMUP64
|
||
#define HAKMEM_TINY_BENCH_WARMUP64 192
|
||
#endif
|
||
|
||
#ifdef HAKMEM_TINY_BENCH_FASTPATH
|
||
static __thread unsigned char g_tls_bench_warm_done[4];
|
||
#endif
|
||
|
||
#if HAKMEM_DEBUG_COUNTERS
|
||
#define HAK_PATHDBG_INC(arr, idx) do { if (g_path_debug_enabled) { (arr)[(idx)]++; } } while(0)
|
||
#define HAK_ULTRADBG_INC(arr, idx) do { (arr)[(idx)]++; } while(0)
|
||
#else
|
||
#define HAK_PATHDBG_INC(arr, idx) do { (void)(idx); } while(0)
|
||
#define HAK_ULTRADBG_INC(arr, idx) do { (void)(idx); } while(0)
|
||
#endif
|
||
// Simple scalar debug increment (no-op when HAKMEM_DEBUG_COUNTERS=0)
|
||
#if HAKMEM_DEBUG_COUNTERS
|
||
#define HAK_DBG_INC(var) do { (var)++; } while(0)
|
||
#else
|
||
#define HAK_DBG_INC(var) do { (void)0; } while(0)
|
||
#endif
|
||
// Return helper: record tiny alloc stat (guarded) then return pointer
|
||
static inline void tiny_debug_track_alloc_ret(int cls, void* ptr);
|
||
|
||
// ========== HAK_RET_ALLOC: Single Definition Point ==========
|
||
// Choose implementation based on HAKMEM_TINY_HEADERLESS or HAKMEM_TINY_HEADER_CLASSIDX
|
||
// - Headerless enabled: Return base directly (no header write)
|
||
// - Phase 7 enabled: Write header and return user pointer (base+1)
|
||
// - Phase 7 disabled: Legacy behavior (stats + route + return)
|
||
|
||
#if HAKMEM_TINY_HEADERLESS
|
||
// Headerless: No header write, user = base
|
||
// Ultra-fast inline macro (1-2 instructions)
|
||
#define HAK_RET_ALLOC(cls, base_ptr) do { \
|
||
/* No header write needed for Headerless mode */ \
|
||
return (base_ptr); \
|
||
} while(0)
|
||
|
||
#elif HAKMEM_TINY_HEADER_CLASSIDX
|
||
#if HAKMEM_BUILD_RELEASE
|
||
// Phase E1-CORRECT: ALL classes have 1-byte headers (including C7)
|
||
// Ultra-fast inline macro (3-4 instructions)
|
||
#define HAK_RET_ALLOC(cls, base_ptr) do { \
|
||
tiny_header_write_for_alloc((base_ptr), (cls)); \
|
||
hak_base_ptr_t _base = HAK_BASE_FROM_RAW(base_ptr); \
|
||
hak_user_ptr_t _user = ptr_base_to_user(_base, (cls)); \
|
||
return (void*)HAK_USER_TO_RAW(_user); \
|
||
} while(0)
|
||
#else
|
||
// Debug: Keep full validation via tiny_region_id_write_header() + operation logging
|
||
#define HAK_RET_ALLOC(cls, ptr) do { \
|
||
extern _Atomic uint64_t g_debug_op_count; \
|
||
extern __thread TinyTLSSLL g_tls_sll[]; \
|
||
void* base_ptr = (ptr); \
|
||
void* user_ptr = tiny_region_id_write_header(base_ptr, (cls)); \
|
||
uint64_t op = atomic_fetch_add(&g_debug_op_count, 1); \
|
||
if (op < 50 && (cls) == 1) { \
|
||
fprintf(stderr, "[OP#%04lu ALLOC] cls=%d ptr=%p base=%p from=alloc tls_count=%u\n", \
|
||
(unsigned long)op, (cls), user_ptr, base_ptr, \
|
||
g_tls_sll[(cls)].count); \
|
||
fflush(stderr); \
|
||
} \
|
||
return user_ptr; \
|
||
} while(0)
|
||
#endif
|
||
#else
|
||
// Legacy: Stats and routing before return
|
||
#ifdef HAKMEM_ENABLE_STATS
|
||
// Optional: sampling(ビルド時に有効化)。ホットパスは直接インライン呼び出し(間接分岐なし)。
|
||
#ifdef HAKMEM_TINY_STAT_SAMPLING
|
||
static __thread unsigned g_tls_stat_accum_alloc[TINY_NUM_CLASSES];
|
||
static int g_stat_rate_lg = 0; // 0=毎回、それ以外=2^lgごと
|
||
static inline __attribute__((always_inline)) void hkm_stat_alloc(int cls) {
|
||
if (__builtin_expect(g_stat_rate_lg == 0, 1)) { stats_record_alloc(cls); return; }
|
||
unsigned m = (1u << g_stat_rate_lg) - 1u;
|
||
if (((++g_tls_stat_accum_alloc[cls]) & m) == 0u) stats_record_alloc(cls);
|
||
}
|
||
#else
|
||
static inline __attribute__((always_inline)) void hkm_stat_alloc(int cls) { stats_record_alloc(cls); }
|
||
#endif
|
||
#define HAK_RET_ALLOC(cls, ptr) do { \
|
||
tiny_debug_track_alloc_ret((cls), (ptr)); \
|
||
hkm_stat_alloc((cls)); \
|
||
ROUTE_COMMIT((cls), 0x7F); \
|
||
return (ptr); \
|
||
} while(0)
|
||
#else
|
||
#define HAK_RET_ALLOC(cls, ptr) do { \
|
||
tiny_debug_track_alloc_ret((cls), (ptr)); \
|
||
ROUTE_COMMIT((cls), 0x7F); \
|
||
return (ptr); \
|
||
} while(0)
|
||
#endif
|
||
#endif // HAKMEM_TINY_HEADER_CLASSIDX
|
||
|
||
// Free-side stats: compile-time zero when stats disabled
|
||
#ifdef HAKMEM_ENABLE_STATS
|
||
#define HAK_STAT_FREE(cls) do { stats_record_free((cls)); } while(0)
|
||
#else
|
||
#define HAK_STAT_FREE(cls) do { } while(0)
|
||
#endif
|
||
|