- Move tiny_get_max_size to header for inlining - Use cached static variable to avoid repeated env lookup - Larson: 51.99M ops/s (stable) 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com>
181 lines
7.2 KiB
C++
181 lines
7.2 KiB
C++
// hakmem_tiny_config_box.inc
|
||
// Box: Tiny allocator configuration, debug counters, and return helpers.
|
||
// Extracted from hakmem_tiny.c to reduce file size and isolate config logic.
|
||
|
||
// ============================================================================
|
||
// Size class table (Box 3 dependency)
|
||
// ============================================================================
|
||
// Phase E1-CORRECT: ALL classes have 1-byte header
|
||
// These sizes represent TOTAL BLOCK SIZE (stride) = [Header 1B][Data N-1B]
|
||
// Usable data = stride - 1 (implicit)
|
||
const size_t g_tiny_class_sizes[TINY_NUM_CLASSES] = {
|
||
8, // Class 0: 8B total = [Header 1B][Data 7B]
|
||
16, // Class 1: 16B total = [Header 1B][Data 15B]
|
||
32, // Class 2: 32B total = [Header 1B][Data 31B]
|
||
64, // Class 3: 64B total = [Header 1B][Data 63B]
|
||
128, // Class 4: 128B total = [Header 1B][Data 127B]
|
||
256, // Class 5: 256B total = [Header 1B][Data 255B]
|
||
512, // Class 6: 512B total = [Header 1B][Data 511B]
|
||
2048 // Class 7: 2048B total = [Header 1B][Data 2047B] (upgraded for 1024B requests)
|
||
};
|
||
|
||
// ============================================================================
|
||
// Phase 16: Dynamic Tiny Max Size (ENV: HAKMEM_TINY_MAX_CLASS)
|
||
// Phase 17-1: Auto-adjust when Small-Mid enabled
|
||
// ============================================================================
|
||
|
||
// Forward declaration for Small-Mid check
|
||
extern bool smallmid_is_enabled(void);
|
||
|
||
// Optimized: Cached max size for hot path
|
||
// Moved to hakmem_tiny.h for global inlining
|
||
|
||
// ============================================================================
|
||
// PRIORITY 1-4: Integrity Check Counters
|
||
// ============================================================================
|
||
_Atomic uint64_t g_integrity_check_class_bounds = 0;
|
||
_Atomic uint64_t g_integrity_check_freelist = 0;
|
||
_Atomic uint64_t g_integrity_check_canary = 0;
|
||
_Atomic uint64_t g_integrity_check_header = 0;
|
||
|
||
// Build-time gate for debug counters (path/ultra). Default OFF.
|
||
#ifndef HAKMEM_DEBUG_COUNTERS
|
||
#define HAKMEM_DEBUG_COUNTERS 0
|
||
#endif
|
||
|
||
int g_debug_fast0 = 0;
|
||
int g_debug_remote_guard = 0;
|
||
int g_remote_force_notify = 0;
|
||
// Tiny free safety (debug)
|
||
int g_tiny_safe_free = 0; // Default OFF for performance; env: HAKMEM_SAFE_FREE=1 でON
|
||
int g_tiny_safe_free_strict = 0; // env: HAKMEM_SAFE_FREE_STRICT=1
|
||
int g_tiny_force_remote = 0; // env: HAKMEM_TINY_FORCE_REMOTE=1
|
||
|
||
// Build-time gate: Minimal Tiny front (bench-only)
|
||
|
||
static inline int superslab_trace_enabled(void) {
|
||
static int g_ss_trace_flag = -1;
|
||
if (__builtin_expect(g_ss_trace_flag == -1, 0)) {
|
||
const char* tr = getenv("HAKMEM_TINY_SUPERSLAB_TRACE");
|
||
g_ss_trace_flag = (tr && atoi(tr) != 0) ? 1 : 0;
|
||
}
|
||
return g_ss_trace_flag;
|
||
}
|
||
// When enabled, physically excludes optional front tiers from the hot path
|
||
// (UltraFront/Quick/Frontend/HotMag/SS-try/BumpShadow), leaving:
|
||
// SLL → TLS Magazine → SuperSlab → (remaining slow path)
|
||
#ifndef HAKMEM_TINY_MINIMAL_FRONT
|
||
#define HAKMEM_TINY_MINIMAL_FRONT 1
|
||
#endif
|
||
// Strict front: compile-out optional front tiers but keep baseline structure intact
|
||
#ifndef HAKMEM_TINY_STRICT_FRONT
|
||
#define HAKMEM_TINY_STRICT_FRONT 0
|
||
#endif
|
||
|
||
// Bench-only fast path knobs (defaults)
|
||
#ifndef HAKMEM_TINY_BENCH_REFILL
|
||
#define HAKMEM_TINY_BENCH_REFILL 8
|
||
#endif
|
||
// Optional per-class overrides (bench-only)
|
||
#ifndef HAKMEM_TINY_BENCH_REFILL8
|
||
#define HAKMEM_TINY_BENCH_REFILL8 HAKMEM_TINY_BENCH_REFILL
|
||
#endif
|
||
#ifndef HAKMEM_TINY_BENCH_REFILL16
|
||
#define HAKMEM_TINY_BENCH_REFILL16 HAKMEM_TINY_BENCH_REFILL
|
||
#endif
|
||
#ifndef HAKMEM_TINY_BENCH_REFILL32
|
||
#define HAKMEM_TINY_BENCH_REFILL32 HAKMEM_TINY_BENCH_REFILL
|
||
#endif
|
||
#ifndef HAKMEM_TINY_BENCH_REFILL64
|
||
#define HAKMEM_TINY_BENCH_REFILL64 HAKMEM_TINY_BENCH_REFILL
|
||
#endif
|
||
|
||
// Bench-only warmup amounts (pre-fill TLS SLL on first alloc per class)
|
||
#ifndef HAKMEM_TINY_BENCH_WARMUP8
|
||
#define HAKMEM_TINY_BENCH_WARMUP8 64
|
||
#endif
|
||
#ifndef HAKMEM_TINY_BENCH_WARMUP16
|
||
#define HAKMEM_TINY_BENCH_WARMUP16 96
|
||
#endif
|
||
#ifndef HAKMEM_TINY_BENCH_WARMUP32
|
||
#define HAKMEM_TINY_BENCH_WARMUP32 160
|
||
#endif
|
||
#ifndef HAKMEM_TINY_BENCH_WARMUP64
|
||
#define HAKMEM_TINY_BENCH_WARMUP64 192
|
||
#endif
|
||
|
||
#ifdef HAKMEM_TINY_BENCH_FASTPATH
|
||
static __thread unsigned char g_tls_bench_warm_done[4];
|
||
#endif
|
||
|
||
#if HAKMEM_DEBUG_COUNTERS
|
||
#define HAK_PATHDBG_INC(arr, idx) do { if (g_path_debug_enabled) { (arr)[(idx)]++; } } while(0)
|
||
#define HAK_ULTRADBG_INC(arr, idx) do { (arr)[(idx)]++; } while(0)
|
||
#else
|
||
#define HAK_PATHDBG_INC(arr, idx) do { (void)(idx); } while(0)
|
||
#define HAK_ULTRADBG_INC(arr, idx) do { (void)(idx); } while(0)
|
||
#endif
|
||
// Simple scalar debug increment (no-op when HAKMEM_DEBUG_COUNTERS=0)
|
||
#if HAKMEM_DEBUG_COUNTERS
|
||
#define HAK_DBG_INC(var) do { (var)++; } while(0)
|
||
#else
|
||
#define HAK_DBG_INC(var) do { (void)0; } while(0)
|
||
#endif
|
||
// Return helper: record tiny alloc stat (guarded) then return pointer
|
||
static inline void tiny_debug_track_alloc_ret(int cls, void* ptr);
|
||
|
||
// ========== HAK_RET_ALLOC: Single Definition Point ==========
|
||
// Choose implementation based on HAKMEM_TINY_HEADER_CLASSIDX
|
||
// - Phase 7 enabled: Write header and return user pointer
|
||
// - Phase 7 disabled: Legacy behavior (stats + route + return)
|
||
|
||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||
#if HAKMEM_BUILD_RELEASE
|
||
// Phase E1-CORRECT: ALL classes have 1-byte headers (including C7)
|
||
// Ultra-fast inline macro (3-4 instructions)
|
||
#define HAK_RET_ALLOC(cls, base_ptr) do { \
|
||
*(uint8_t*)(base_ptr) = HEADER_MAGIC | ((cls) & HEADER_CLASS_MASK); \
|
||
return (void*)((uint8_t*)(base_ptr) + 1); \
|
||
} while(0)
|
||
#else
|
||
// Debug: Keep full validation via tiny_region_id_write_header()
|
||
#define HAK_RET_ALLOC(cls, ptr) return tiny_region_id_write_header((ptr), (cls))
|
||
#endif
|
||
#else
|
||
// Legacy: Stats and routing before return
|
||
#ifdef HAKMEM_ENABLE_STATS
|
||
// Optional: sampling(ビルド時に有効化)。ホットパスは直接インライン呼び出し(間接分岐なし)。
|
||
#ifdef HAKMEM_TINY_STAT_SAMPLING
|
||
static __thread unsigned g_tls_stat_accum_alloc[TINY_NUM_CLASSES];
|
||
static int g_stat_rate_lg = 0; // 0=毎回、それ以外=2^lgごと
|
||
static inline __attribute__((always_inline)) void hkm_stat_alloc(int cls) {
|
||
if (__builtin_expect(g_stat_rate_lg == 0, 1)) { stats_record_alloc(cls); return; }
|
||
unsigned m = (1u << g_stat_rate_lg) - 1u;
|
||
if (((++g_tls_stat_accum_alloc[cls]) & m) == 0u) stats_record_alloc(cls);
|
||
}
|
||
#else
|
||
static inline __attribute__((always_inline)) void hkm_stat_alloc(int cls) { stats_record_alloc(cls); }
|
||
#endif
|
||
#define HAK_RET_ALLOC(cls, ptr) do { \
|
||
tiny_debug_track_alloc_ret((cls), (ptr)); \
|
||
hkm_stat_alloc((cls)); \
|
||
ROUTE_COMMIT((cls), 0x7F); \
|
||
return (ptr); \
|
||
} while(0)
|
||
#else
|
||
#define HAK_RET_ALLOC(cls, ptr) do { \
|
||
tiny_debug_track_alloc_ret((cls), (ptr)); \
|
||
ROUTE_COMMIT((cls), 0x7F); \
|
||
return (ptr); \
|
||
} while(0)
|
||
#endif
|
||
#endif // HAKMEM_TINY_HEADER_CLASSIDX
|
||
|
||
// Free-side stats: compile-time zero when stats disabled
|
||
#ifdef HAKMEM_ENABLE_STATS
|
||
#define HAK_STAT_FREE(cls) do { stats_record_free((cls)); } while(0)
|
||
#else
|
||
#define HAK_STAT_FREE(cls) do { } while(0)
|
||
#endif
|
||
|