Files
hakmem/core/hakmem_tiny_config_box.inc
Moe Charm (CI) a78224123e Fix C0/C7 class confusion: Upgrade C7 stride to 2048B and fix meta->class_idx initialization
Root Cause:
1. C7 stride was 1024B, unable to serve 1024B user requests (need 1025B with header)
2. New SuperSlabs start with meta->class_idx=0 (mmap zero-init)
3. superslab_init_slab() only sets class_idx if meta->class_idx==255
4. Multiple code paths used conditional assignment (if class_idx==255), leaving C7 slabs with class_idx=0
5. This caused C7 blocks to be misidentified as C0, leading to HDR_META_MISMATCH errors

Changes:
1. Upgrade C7 stride: 1024B → 2048B (can now serve 1024B requests)
2. Update blocks_per_slab[7]: 64 → 32 (2048B stride / 64KB slab)
3. Update size-to-class LUT: entries 513-2048 now map to C7
4. Fix superslab_init_slab() fail-safe: only reinitialize if class_idx==255 (not 0)
5. Add explicit class_idx assignment in 6 initialization paths:
   - tiny_superslab_alloc.inc.h: superslab_refill() after init
   - hakmem_tiny_superslab.c: backend_shared after init (main path)
   - ss_unified_backend_box.c: unconditional assignment
   - ss_legacy_backend_box.c: explicit assignment
   - superslab_expansion_box.c: explicit assignment
   - ss_allocation_box.c: fail-safe condition fix

Fix P0 refill bug:
- Update obsolete array access after Phase 3d-B TLS SLL unification
- g_tls_sll_head[cls] → g_tls_sll[cls].head
- g_tls_sll_count[cls] → g_tls_sll[cls].count

Results:
- HDR_META_MISMATCH: eliminated (0 errors in 100K iterations)
- 1024B allocations now routed to C7 (Tiny fast path)
- NXT_MISALIGN warnings remain (legacy 1024B SuperSlabs, separate issue)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-21 13:44:05 +09:00

212 lines
8.4 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// hakmem_tiny_config_box.inc
// Box: Tiny allocator configuration, debug counters, and return helpers.
// Extracted from hakmem_tiny.c to reduce file size and isolate config logic.
// ============================================================================
// Size class table (Box 3 dependency)
// ============================================================================
// Phase E1-CORRECT: ALL classes have 1-byte header
// These sizes represent TOTAL BLOCK SIZE (stride) = [Header 1B][Data N-1B]
// Usable data = stride - 1 (implicit)
const size_t g_tiny_class_sizes[TINY_NUM_CLASSES] = {
8, // Class 0: 8B total = [Header 1B][Data 7B]
16, // Class 1: 16B total = [Header 1B][Data 15B]
32, // Class 2: 32B total = [Header 1B][Data 31B]
64, // Class 3: 64B total = [Header 1B][Data 63B]
128, // Class 4: 128B total = [Header 1B][Data 127B]
256, // Class 5: 256B total = [Header 1B][Data 255B]
512, // Class 6: 512B total = [Header 1B][Data 511B]
2048 // Class 7: 2048B total = [Header 1B][Data 2047B] (upgraded for 1024B requests)
};
// ============================================================================
// Phase 16: Dynamic Tiny Max Size (ENV: HAKMEM_TINY_MAX_CLASS)
// Phase 17-1: Auto-adjust when Small-Mid enabled
// ============================================================================
// Forward declaration for Small-Mid check
extern bool smallmid_is_enabled(void);
// Get dynamic max size for Tiny allocator based on ENV configuration
// Default: 1023B (C0-C7), can be reduced to 255B (C0-C5)
// Phase 17-1: Auto-reduces to 255B when Small-Mid is enabled
size_t tiny_get_max_size(void) {
static int g_max_class = -1;
if (__builtin_expect(g_max_class == -1, 0)) {
const char* env = getenv("HAKMEM_TINY_MAX_CLASS");
if (env && *env) {
int max_class = atoi(env);
if (max_class >= 0 && max_class < TINY_NUM_CLASSES) {
g_max_class = max_class;
} else {
g_max_class = 7; // Default: all classes (C0-C7)
}
} else {
g_max_class = 7; // Default: all classes
}
}
// Phase 17-1: Auto-adjust when Small-Mid enabled
// Small-Mid handles 256B-1KB, so Tiny should only handle 0-255B
int effective_class = g_max_class;
if (smallmid_is_enabled() && effective_class > 5) {
effective_class = 5; // Limit to C0-C5 (0-255B)
}
// Map class to max usable size (stride - 1)
// C0=8B, C1=16B, C2=32B, C3=64B, C4=128B, C5=256B, C6=512B, C7=2048B
static const size_t class_to_max_size[TINY_NUM_CLASSES] = {
7, 15, 31, 63, 127, 255, 511, 2047
};
return class_to_max_size[effective_class];
}
// ============================================================================
// PRIORITY 1-4: Integrity Check Counters
// ============================================================================
_Atomic uint64_t g_integrity_check_class_bounds = 0;
_Atomic uint64_t g_integrity_check_freelist = 0;
_Atomic uint64_t g_integrity_check_canary = 0;
_Atomic uint64_t g_integrity_check_header = 0;
// Build-time gate for debug counters (path/ultra). Default OFF.
#ifndef HAKMEM_DEBUG_COUNTERS
#define HAKMEM_DEBUG_COUNTERS 0
#endif
int g_debug_fast0 = 0;
int g_debug_remote_guard = 0;
int g_remote_force_notify = 0;
// Tiny free safety (debug)
int g_tiny_safe_free = 0; // Default OFF for performance; env: HAKMEM_SAFE_FREE=1 でON
int g_tiny_safe_free_strict = 0; // env: HAKMEM_SAFE_FREE_STRICT=1
int g_tiny_force_remote = 0; // env: HAKMEM_TINY_FORCE_REMOTE=1
// Build-time gate: Minimal Tiny front (bench-only)
static inline int superslab_trace_enabled(void) {
static int g_ss_trace_flag = -1;
if (__builtin_expect(g_ss_trace_flag == -1, 0)) {
const char* tr = getenv("HAKMEM_TINY_SUPERSLAB_TRACE");
g_ss_trace_flag = (tr && atoi(tr) != 0) ? 1 : 0;
}
return g_ss_trace_flag;
}
// When enabled, physically excludes optional front tiers from the hot path
// (UltraFront/Quick/Frontend/HotMag/SS-try/BumpShadow), leaving:
// SLL → TLS Magazine → SuperSlab → (remaining slow path)
#ifndef HAKMEM_TINY_MINIMAL_FRONT
#define HAKMEM_TINY_MINIMAL_FRONT 1
#endif
// Strict front: compile-out optional front tiers but keep baseline structure intact
#ifndef HAKMEM_TINY_STRICT_FRONT
#define HAKMEM_TINY_STRICT_FRONT 0
#endif
// Bench-only fast path knobs (defaults)
#ifndef HAKMEM_TINY_BENCH_REFILL
#define HAKMEM_TINY_BENCH_REFILL 8
#endif
// Optional per-class overrides (bench-only)
#ifndef HAKMEM_TINY_BENCH_REFILL8
#define HAKMEM_TINY_BENCH_REFILL8 HAKMEM_TINY_BENCH_REFILL
#endif
#ifndef HAKMEM_TINY_BENCH_REFILL16
#define HAKMEM_TINY_BENCH_REFILL16 HAKMEM_TINY_BENCH_REFILL
#endif
#ifndef HAKMEM_TINY_BENCH_REFILL32
#define HAKMEM_TINY_BENCH_REFILL32 HAKMEM_TINY_BENCH_REFILL
#endif
#ifndef HAKMEM_TINY_BENCH_REFILL64
#define HAKMEM_TINY_BENCH_REFILL64 HAKMEM_TINY_BENCH_REFILL
#endif
// Bench-only warmup amounts (pre-fill TLS SLL on first alloc per class)
#ifndef HAKMEM_TINY_BENCH_WARMUP8
#define HAKMEM_TINY_BENCH_WARMUP8 64
#endif
#ifndef HAKMEM_TINY_BENCH_WARMUP16
#define HAKMEM_TINY_BENCH_WARMUP16 96
#endif
#ifndef HAKMEM_TINY_BENCH_WARMUP32
#define HAKMEM_TINY_BENCH_WARMUP32 160
#endif
#ifndef HAKMEM_TINY_BENCH_WARMUP64
#define HAKMEM_TINY_BENCH_WARMUP64 192
#endif
#ifdef HAKMEM_TINY_BENCH_FASTPATH
static __thread unsigned char g_tls_bench_warm_done[4];
#endif
#if HAKMEM_DEBUG_COUNTERS
#define HAK_PATHDBG_INC(arr, idx) do { if (g_path_debug_enabled) { (arr)[(idx)]++; } } while(0)
#define HAK_ULTRADBG_INC(arr, idx) do { (arr)[(idx)]++; } while(0)
#else
#define HAK_PATHDBG_INC(arr, idx) do { (void)(idx); } while(0)
#define HAK_ULTRADBG_INC(arr, idx) do { (void)(idx); } while(0)
#endif
// Simple scalar debug increment (no-op when HAKMEM_DEBUG_COUNTERS=0)
#if HAKMEM_DEBUG_COUNTERS
#define HAK_DBG_INC(var) do { (var)++; } while(0)
#else
#define HAK_DBG_INC(var) do { (void)0; } while(0)
#endif
// Return helper: record tiny alloc stat (guarded) then return pointer
static inline void tiny_debug_track_alloc_ret(int cls, void* ptr);
// ========== HAK_RET_ALLOC: Single Definition Point ==========
// Choose implementation based on HAKMEM_TINY_HEADER_CLASSIDX
// - Phase 7 enabled: Write header and return user pointer
// - Phase 7 disabled: Legacy behavior (stats + route + return)
#if HAKMEM_TINY_HEADER_CLASSIDX
#if HAKMEM_BUILD_RELEASE
// Phase E1-CORRECT: ALL classes have 1-byte headers (including C7)
// Ultra-fast inline macro (3-4 instructions)
#define HAK_RET_ALLOC(cls, base_ptr) do { \
*(uint8_t*)(base_ptr) = HEADER_MAGIC | ((cls) & HEADER_CLASS_MASK); \
return (void*)((uint8_t*)(base_ptr) + 1); \
} while(0)
#else
// Debug: Keep full validation via tiny_region_id_write_header()
#define HAK_RET_ALLOC(cls, ptr) return tiny_region_id_write_header((ptr), (cls))
#endif
#else
// Legacy: Stats and routing before return
#ifdef HAKMEM_ENABLE_STATS
// Optional: samplingビルド時に有効化。ホットパスは直接インライン呼び出し間接分岐なし
#ifdef HAKMEM_TINY_STAT_SAMPLING
static __thread unsigned g_tls_stat_accum_alloc[TINY_NUM_CLASSES];
static int g_stat_rate_lg = 0; // 0=毎回、それ以外=2^lgごと
static inline __attribute__((always_inline)) void hkm_stat_alloc(int cls) {
if (__builtin_expect(g_stat_rate_lg == 0, 1)) { stats_record_alloc(cls); return; }
unsigned m = (1u << g_stat_rate_lg) - 1u;
if (((++g_tls_stat_accum_alloc[cls]) & m) == 0u) stats_record_alloc(cls);
}
#else
static inline __attribute__((always_inline)) void hkm_stat_alloc(int cls) { stats_record_alloc(cls); }
#endif
#define HAK_RET_ALLOC(cls, ptr) do { \
tiny_debug_track_alloc_ret((cls), (ptr)); \
hkm_stat_alloc((cls)); \
ROUTE_COMMIT((cls), 0x7F); \
return (ptr); \
} while(0)
#else
#define HAK_RET_ALLOC(cls, ptr) do { \
tiny_debug_track_alloc_ret((cls), (ptr)); \
ROUTE_COMMIT((cls), 0x7F); \
return (ptr); \
} while(0)
#endif
#endif // HAKMEM_TINY_HEADER_CLASSIDX
// Free-side stats: compile-time zero when stats disabled
#ifdef HAKMEM_ENABLE_STATS
#define HAK_STAT_FREE(cls) do { stats_record_free((cls)); } while(0)
#else
#define HAK_STAT_FREE(cls) do { } while(0)
#endif