Files
hakmem/core/hakmem_tiny_init.inc

574 lines
23 KiB
PHP
Raw Normal View History

// hakmem_tiny_init.inc
// Phase 2D-2: Initialization function extraction
//
// This file contains the hak_tiny_init() function extracted from hakmem_tiny.c
// to improve code organization. Reduces main file by 450 lines (24%).
//
// Cold path only - called once at startup.
void hak_tiny_init(void) {
if (g_tiny_initialized) return;
// Step 1: Simple initialization (static global is already zero-initialized)
g_tiny_initialized = 1;
// Reset fast-cache defaults and apply preset (if provided)
tiny_config_reset_defaults();
char* preset_env = getenv("HAKMEM_TINY_PRESET");
if (preset_env) {
if (strcasecmp(preset_env, "TIGHT") == 0) {
TINY_PRESET_TIGHT();
} else if (strcasecmp(preset_env, "ULTRA_TIGHT") == 0 ||
strcasecmp(preset_env, "ULTRATIGHT") == 0) {
TINY_PRESET_ULTRA_TIGHT();
} else {
TINY_PRESET_BALANCED();
}
}
// Phase 6.14: Read environment variable for Registry ON/OFF
char* env = getenv("HAKMEM_USE_REGISTRY");
if (env) {
g_use_registry = atoi(env);
} else {
g_use_registry = 1; // Default ON for multi-thread safety
}
// Phase 6.15: Runtime toggle to allow Tiny within wrappers
// HAKMEM_WRAP_TINY=1 → enable Tiny fast-path during wrapper calls
char* wrap_env = getenv("HAKMEM_WRAP_TINY");
if (wrap_env && atoi(wrap_env) != 0) {
g_wrap_tiny_enabled = 1;
}
char* wrap_refill_env = getenv("HAKMEM_WRAP_TINY_REFILL");
if (wrap_refill_env && atoi(wrap_refill_env) != 0) {
g_wrap_tiny_refill = 1;
}
// Remote-drain knobs
char* rth = getenv("HAKMEM_TINY_REMOTE_DRAIN_THRESHOLD");
if (rth) { int v = atoi(rth); if (v > 0) g_remote_drain_thresh = v; }
char* rr = getenv("HAKMEM_TINY_REMOTE_DRAIN_TRYRATE");
if (rr) { int v = atoi(rr); if (v > 0) g_remote_drain_tryrate = v; }
char* cs = getenv("HAKMEM_TINY_COUNT_SAMPLE");
if (cs) { int v = atoi(cs); if (v>=0 && v<=16) g_tiny_count_sample_exp = v; }
int mem_diet_enabled = 1; // Default: Enable for memory efficiency
char* memdiet_env = getenv("HAKMEM_TINY_MEM_DIET");
if (memdiet_env && atoi(memdiet_env) == 0) {
mem_diet_enabled = 0; // Allow disabling via env
}
if (mem_diet_enabled) {
if (g_mag_cap_limit > 64) g_mag_cap_limit = 64;
}
// Optional: Magazine cap limit (runtime lower bound)
char* mag_env = getenv("HAKMEM_TINY_MAG_CAP");
if (mag_env) {
int val = atoi(mag_env);
if (val > 0 && val < g_mag_cap_limit) g_mag_cap_limit = val;
}
// Phase X: Initialize TLS free-list defaults
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
TinyTLSList* tls = &g_tls_lists[i];
tls->head = NULL;
tls->count = 0;
uint32_t base_cap = (uint32_t)tiny_default_cap(i);
uint32_t class_max = (uint32_t)tiny_cap_max_for_class(i);
if (base_cap > class_max) base_cap = class_max;
if ((uint32_t)g_mag_cap_limit < base_cap) base_cap = (uint32_t)g_mag_cap_limit;
if (g_mag_cap_override[i] > 0) {
uint32_t ov = (uint32_t)g_mag_cap_override[i];
if (ov > class_max) ov = class_max;
if (ov > (uint32_t)g_mag_cap_limit) ov = (uint32_t)g_mag_cap_limit;
if (ov != 0u) base_cap = ov;
}
if (base_cap == 0u) base_cap = 32u;
tls->cap = base_cap;
tls->refill_low = tiny_tls_default_refill(base_cap);
tls->spill_high = tiny_tls_default_spill(base_cap);
tiny_tls_publish_targets(i, base_cap);
}
if (mem_diet_enabled) {
tiny_apply_mem_diet();
}
feat: Phase 7 + Phase 2 - Massive performance & stability improvements Performance Achievements: - Tiny allocations: +180-280% (21M → 59-70M ops/s random mixed) - Single-thread: +24% (2.71M → 3.36M ops/s Larson) - 4T stability: 0% → 95% (19/20 success rate) - Overall: 91.3% of System malloc average (target was 40-55%) ✓ Phase 7 (Tasks 1-3): Core Optimizations - Task 1: Header validation removal (Region-ID direct lookup) - Task 2: Aggressive inline (TLS cache access optimization) - Task 3: Pre-warm TLS cache (eliminate cold-start penalty) Result: +180-280% improvement, 85-146% of System malloc Critical Bug Fixes: - Fix 64B allocation crash (size-to-class +1 for header) - Fix 4T wrapper recursion bugs (BUG #7, #8, #10, #11) - Remove malloc fallback (30% → 50% stability) Phase 2a: SuperSlab Dynamic Expansion (CRITICAL) - Implement mimalloc-style chunk linking - Unlimited slab expansion (no more OOM at 32 slabs) - Fix chunk initialization bug (bitmap=0x00000001 after expansion) Files: core/hakmem_tiny_superslab.c/h, core/superslab/superslab_types.h Result: 50% → 95% stability (19/20 4T success) Phase 2b: TLS Cache Adaptive Sizing - Dynamic capacity: 16-2048 slots based on usage - High-water mark tracking + exponential growth/shrink - Expected: +3-10% performance, -30-50% memory Files: core/tiny_adaptive_sizing.c/h (new) Phase 2c: BigCache Dynamic Hash Table - Migrate from fixed 256×8 array to dynamic hash table - Auto-resize: 256 → 512 → 1024 → 65,536 buckets - Improved hash function (FNV-1a) + collision chaining Files: core/hakmem_bigcache.c/h Expected: +10-20% cache hit rate Design Flaws Analysis: - Identified 6 components with fixed-capacity bottlenecks - SuperSlab (CRITICAL), TLS Cache (HIGH), BigCache/L2.5 (MEDIUM) - Report: DESIGN_FLAWS_ANALYSIS.md (11 chapters) Documentation: - 13 comprehensive reports (PHASE*.md, DESIGN_FLAWS*.md) - Implementation guides, test results, production readiness - Bug fix reports, root cause analysis Build System: - Makefile: phase7 targets, PREWARM_TLS flag - Auto dependency generation (-MMD -MP) for .inc files Known Issues: - 4T stability: 19/20 (95%) - investigating 1 failure for 100% - L2.5 Pool dynamic sharding: design only (needs 2-3 days integration) 🤖 Generated with Claude Code (https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-08 17:08:00 +09:00
// Phase 2b: Initialize adaptive TLS cache sizing
adaptive_sizing_init();
// Enable signal-triggered stats dump if requested (SIGUSR1)
hak_tiny_enable_signal_dump();
// Phase 6.23: SuperSlab support (mimalloc-style fast allocation)
// Allow runtime disable/enable via env (0=off, 1=on)
Phase 6-2.3~6-2.5: Critical bug fixes + SuperSlab optimization (WIP) ## Phase 6-2.3: Fix 4T Larson crash (active counter bug) ✅ **Problem:** 4T Larson crashed with "free(): invalid pointer", OOM errors **Root cause:** core/hakmem_tiny_refill_p0.inc.h:103 - P0 batch refill moved freelist blocks to TLS cache - Active counter NOT incremented → double-decrement on free - Counter underflows → SuperSlab appears full → OOM → crash **Fix:** Added ss_active_add(tls->ss, from_freelist); **Result:** 4T stable at 838K ops/s ✅ ## Phase 6-2.4: Fix SEGV in random_mixed/mid_large_mt benchmarks ✅ **Problem:** bench_random_mixed_hakmem, bench_mid_large_mt_hakmem → immediate SEGV **Root cause #1:** core/box/hak_free_api.inc.h:92-95 - "Guess loop" dereferenced unmapped memory when registry lookup failed **Root cause #2:** core/box/hak_free_api.inc.h:115 - Header magic check dereferenced unmapped memory **Fix:** 1. Removed dangerous guess loop (lines 92-95) 2. Added hak_is_memory_readable() check before dereferencing header (core/hakmem_internal.h:277-294 - uses mincore() syscall) **Result:** - random_mixed (2KB): SEGV → 2.22M ops/s ✅ - random_mixed (4KB): SEGV → 2.58M ops/s ✅ - Larson 4T: no regression (838K ops/s) ✅ ## Phase 6-2.5: Performance investigation + SuperSlab fix (WIP) ⚠️ **Problem:** Severe performance gaps (19-26x slower than system malloc) **Investigation:** Task agent identified root cause - hak_is_memory_readable() syscall overhead (100-300 cycles per free) - ALL frees hit unmapped_header_fallback path - SuperSlab lookup NEVER called - Why? g_use_superslab = 0 (disabled by diet mode) **Root cause:** core/hakmem_tiny_init.inc:104-105 - Diet mode (default ON) disables SuperSlab - SuperSlab defaults to 1 (hakmem_config.c:334) - BUT diet mode overrides it to 0 during init **Fix:** Separate SuperSlab from diet mode - SuperSlab: Performance-critical (fast alloc/free) - Diet mode: Memory efficiency (magazine capacity limits only) - Both are independent features, should not interfere **Status:** ⚠️ INCOMPLETE - New SEGV discovered after fix - SuperSlab lookup now works (confirmed via debug output) - But benchmark crashes (Exit 139) after ~20 lookups - Needs further investigation **Files modified:** - core/hakmem_tiny_init.inc:99-109 - Removed diet mode override - PERFORMANCE_INVESTIGATION_REPORT.md - Task agent analysis (303x instruction gap) **Next steps:** - Investigate new SEGV (likely SuperSlab free path bug) - OR: Revert Phase 6-2.5 changes if blocking progress 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 20:31:01 +09:00
// Phase 6-2.5 FIX: SuperSlab is independent from diet mode (both are performance-critical)
// - SuperSlab: Fast allocation/free (defaults to 1, set in hakmem_config.c:334)
// - Diet mode: Magazine capacity limits only (doesn't disable subsystems)
char* superslab_env = getenv("HAKMEM_TINY_USE_SUPERSLAB");
if (superslab_env) {
g_use_superslab = (atoi(superslab_env) != 0) ? 1 : 0;
}
Phase 6-2.3~6-2.5: Critical bug fixes + SuperSlab optimization (WIP) ## Phase 6-2.3: Fix 4T Larson crash (active counter bug) ✅ **Problem:** 4T Larson crashed with "free(): invalid pointer", OOM errors **Root cause:** core/hakmem_tiny_refill_p0.inc.h:103 - P0 batch refill moved freelist blocks to TLS cache - Active counter NOT incremented → double-decrement on free - Counter underflows → SuperSlab appears full → OOM → crash **Fix:** Added ss_active_add(tls->ss, from_freelist); **Result:** 4T stable at 838K ops/s ✅ ## Phase 6-2.4: Fix SEGV in random_mixed/mid_large_mt benchmarks ✅ **Problem:** bench_random_mixed_hakmem, bench_mid_large_mt_hakmem → immediate SEGV **Root cause #1:** core/box/hak_free_api.inc.h:92-95 - "Guess loop" dereferenced unmapped memory when registry lookup failed **Root cause #2:** core/box/hak_free_api.inc.h:115 - Header magic check dereferenced unmapped memory **Fix:** 1. Removed dangerous guess loop (lines 92-95) 2. Added hak_is_memory_readable() check before dereferencing header (core/hakmem_internal.h:277-294 - uses mincore() syscall) **Result:** - random_mixed (2KB): SEGV → 2.22M ops/s ✅ - random_mixed (4KB): SEGV → 2.58M ops/s ✅ - Larson 4T: no regression (838K ops/s) ✅ ## Phase 6-2.5: Performance investigation + SuperSlab fix (WIP) ⚠️ **Problem:** Severe performance gaps (19-26x slower than system malloc) **Investigation:** Task agent identified root cause - hak_is_memory_readable() syscall overhead (100-300 cycles per free) - ALL frees hit unmapped_header_fallback path - SuperSlab lookup NEVER called - Why? g_use_superslab = 0 (disabled by diet mode) **Root cause:** core/hakmem_tiny_init.inc:104-105 - Diet mode (default ON) disables SuperSlab - SuperSlab defaults to 1 (hakmem_config.c:334) - BUT diet mode overrides it to 0 during init **Fix:** Separate SuperSlab from diet mode - SuperSlab: Performance-critical (fast alloc/free) - Diet mode: Memory efficiency (magazine capacity limits only) - Both are independent features, should not interfere **Status:** ⚠️ INCOMPLETE - New SEGV discovered after fix - SuperSlab lookup now works (confirmed via debug output) - But benchmark crashes (Exit 139) after ~20 lookups - Needs further investigation **Files modified:** - core/hakmem_tiny_init.inc:99-109 - Removed diet mode override - PERFORMANCE_INVESTIGATION_REPORT.md - Task agent analysis (303x instruction gap) **Next steps:** - Investigate new SEGV (likely SuperSlab free path bug) - OR: Revert Phase 6-2.5 changes if blocking progress 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 20:31:01 +09:00
// Note: Diet mode no longer overrides g_use_superslab (removed lines 104-105)
// SuperSlab defaults to 1 unless explicitly disabled via env var
// One-shot hint: publish/adopt requires SuperSlab ON
{
static int hint_once = 0;
if (!hint_once) {
const char* must_adopt = getenv("HAKMEM_TINY_MUST_ADOPT");
if ((!superslab_env || g_use_superslab == 0) && must_adopt && atoi(must_adopt) != 0) {
fprintf(stderr, "[HINT] HAKMEM_TINY_USE_SUPERSLAB=0: publish/adopt pipeline is disabled. Set =1 for mailbox/adopt.\n");
}
hint_once = 1;
}
}
{
char* tlslist_env = getenv("HAKMEM_TINY_TLS_LIST");
if (tlslist_env) {
g_tls_list_enable = (atoi(tlslist_env) != 0) ? 1 : 0;
}
}
// Phase 9.4: TLS SLL toggle (default ON)
char* sll_env = getenv("HAKMEM_TINY_TLS_SLL");
if (sll_env && atoi(sll_env) == 0) {
g_tls_sll_enable = 0;
}
// Path debug enabled?
{
char* pd = getenv("HAKMEM_TINY_PATH_DEBUG");
g_path_debug_enabled = (pd && atoi(pd) != 0) ? 1 : 0;
}
// Ultra-Bump TLS shadow既定ON、envでOFF可能
{
char* ub = getenv("HAKMEM_TINY_BUMP_SHADOW");
if (ub) { g_ultra_bump_shadow = (atoi(ub) != 0) ? 1 : 0; }
char* bc = getenv("HAKMEM_TINY_BUMP_CHUNK");
if (bc) { int v = atoi(bc); if (v > 0 && v < 32768) g_bump_chunk = v; }
}
// Refill-one-on-missチェーン生成を避け、1個だけ確保して返す
{
char* ro = getenv("HAKMEM_TINY_REFILL_ONE_ON_MISS");
if (ro) g_refill_one_on_miss = (atoi(ro) != 0) ? 1 : 0;
}
// SLL multiplier (hot tiny classes)
char* sllmul = getenv("HAKMEM_SLL_MULTIPLIER");
if (sllmul) {
int v = atoi(sllmul);
if (v < 1) {
v = 1;
} else if (v > 16) {
v = 16; // guardrail
}
g_sll_multiplier = v;
}
// HotMag enable / tuning既定OFF, envでON可
{
char* hm = getenv("HAKMEM_TINY_HOTMAG");
if (hm) g_hotmag_enable = (atoi(hm) != 0) ? 1 : 0;
char* hmcap = getenv("HAKMEM_TINY_HOTMAG_CAP");
if (hmcap) {
int v = atoi(hmcap);
if (v < 16) v = 16;
else if (v > 1024) v = 1024;
g_hotmag_cap_default = v;
}
char* hmrefill = getenv("HAKMEM_TINY_HOTMAG_REFILL");
if (hmrefill) {
int v = atoi(hmrefill);
if (v < 0) v = 0;
if (v > g_hotmag_cap_default) v = g_hotmag_cap_default;
g_hotmag_refill_default = v;
}
if (g_hotmag_refill_default > g_hotmag_cap_default) {
g_hotmag_refill_default = g_hotmag_cap_default;
}
if (g_hotmag_refill_default < 0) g_hotmag_refill_default = 0;
for (int k = 0; k < TINY_NUM_CLASSES; k++) {
uint16_t cap = hotmag_effective_cap(k);
g_hotmag_cap_current[k] = cap;
g_hotmag_cap_locked[k] = 0;
uint16_t refill = (uint16_t)g_hotmag_refill_default;
if (refill > cap) refill = cap;
g_hotmag_refill_current[k] = refill;
g_hotmag_refill_locked[k] = 0;
g_hotmag_class_en[k] = (k <= 3) ? 1 : 0;
}
// Heuristic defaults for the three hottest classes when not overridden
if (!g_hotmag_cap_locked[0]) {
uint16_t cap = g_hotmag_cap_current[0];
uint16_t cap_target = (g_hotmag_cap_default > 48) ? 48 : (uint16_t)g_hotmag_cap_default;
if (cap_target < 16) cap_target = 16;
if (cap_target < cap) g_hotmag_cap_current[0] = cap_target;
}
if (!g_hotmag_cap_locked[1]) {
uint16_t cap = g_hotmag_cap_current[1];
uint16_t cap_target = (g_hotmag_cap_default > 80) ? 80 : (uint16_t)g_hotmag_cap_default;
if (cap_target < 32) cap_target = 32;
if (cap_target < cap) g_hotmag_cap_current[1] = cap_target;
}
if (!g_hotmag_cap_locked[2]) {
uint16_t cap = g_hotmag_cap_current[2];
uint16_t cap_target = (g_hotmag_cap_default > 112) ? 112 : (uint16_t)g_hotmag_cap_default;
if (cap_target < 48) cap_target = 48;
if (cap_target < cap) g_hotmag_cap_current[2] = cap_target;
}
if (!g_hotmag_refill_locked[0]) {
g_hotmag_refill_current[0] = 0;
}
if (!g_hotmag_refill_locked[1]) {
uint16_t cap = g_hotmag_cap_current[1];
uint16_t ref = (g_hotmag_refill_default > 0) ? (uint16_t)g_hotmag_refill_default : 0;
if (ref > 0) {
uint16_t limit = (cap > 20) ? 20 : cap;
if (ref > limit) ref = limit;
if (ref > cap) ref = cap;
}
g_hotmag_refill_current[1] = ref;
}
if (!g_hotmag_refill_locked[2]) {
uint16_t cap = g_hotmag_cap_current[2];
uint16_t ref = (g_hotmag_refill_default > 0) ? (uint16_t)g_hotmag_refill_default : 0;
if (ref > 0) {
uint16_t limit = (cap > 40) ? 40 : cap;
if (ref > limit) ref = limit;
if (ref > cap) ref = cap;
}
g_hotmag_refill_current[2] = ref;
}
// Default: disable class 2 (32B) HotMag entirely unless explicitly enabled by env
if (!getenv("HAKMEM_TINY_HOTMAG_C2")) {
g_hotmag_class_en[2] = 0;
}
for (int k = 0; k < TINY_NUM_CLASSES; k++) {
char key_cap[64];
snprintf(key_cap, sizeof(key_cap), "HAKMEM_TINY_HOTMAG_CAP_C%d", k);
char* cap_env = getenv(key_cap);
if (cap_env) {
int v = atoi(cap_env);
if (v < 16) v = 16;
else if (v > 1024) v = 1024;
g_hotmag_cap_current[k] = (uint16_t)v;
g_hotmag_cap_locked[k] = 1;
if (!g_hotmag_refill_locked[k] && g_hotmag_refill_current[k] > g_hotmag_cap_current[k]) {
g_hotmag_refill_current[k] = g_hotmag_cap_current[k];
}
}
char key_ref[64];
snprintf(key_ref, sizeof(key_ref), "HAKMEM_TINY_HOTMAG_REFILL_C%d", k);
char* ref_env = getenv(key_ref);
if (ref_env) {
int v = atoi(ref_env);
if (v < 0) v = 0;
if (v > g_hotmag_cap_current[k]) v = g_hotmag_cap_current[k];
g_hotmag_refill_current[k] = (uint16_t)v;
g_hotmag_refill_locked[k] = 1;
}
char key_en[64];
snprintf(key_en, sizeof(key_en), "HAKMEM_TINY_HOTMAG_C%d", k);
char* en_env = getenv(key_en);
if (en_env) {
g_hotmag_class_en[k] = (uint8_t)((atoi(en_env) != 0) ? 1 : 0);
}
}
for (int k = 0; k < TINY_NUM_CLASSES; k++) {
if (g_hotmag_enable && hkm_is_hot_class(k)) {
g_tls_hot_mag[k].cap = g_hotmag_cap_current[k];
} else {
g_tls_hot_mag[k].cap = 0; // lazy init
}
g_tls_hot_mag[k].top = 0;
}
}
// Ultra-Simple front enable既定OFF, A/B用
{
char* us = getenv("HAKMEM_TINY_ULTRA_SIMPLE");
if (us) g_ultra_simple = (atoi(us) != 0) ? 1 : 0;
// zero-initialized by default
}
// Background Refill Bin既定OFF, A/B用
{
char* bb = getenv("HAKMEM_TINY_BG_BIN");
if (bb) g_bg_bin_enable = (atoi(bb) != 0) ? 1 : 0;
char* bt = getenv("HAKMEM_TINY_BG_TARGET");
if (bt) { int v = atoi(bt); if (v > 0 && v <= 4096) g_bg_bin_target = v; }
for (int k = 0; k < TINY_NUM_CLASSES; k++) {
atomic_store_explicit(&g_bg_bin_head[k], (uintptr_t)0, memory_order_relaxed);
}
if (g_bg_bin_enable && !g_bg_bin_started) {
if (pthread_create(&g_bg_bin_thread, NULL, tiny_bg_refill_main, NULL) == 0) {
g_bg_bin_started = 1;
} else {
g_bg_bin_enable = 0; // disable on failure
}
}
}
// Background Spill/Drain (integrated into bg thread)
// EXTRACTED: bg_spill init moved to hakmem_tiny_bg_spill.c (Phase 2C-2)
{
bg_spill_init(); // Initialize bg_spill module from environment
// Remote target queue init (Phase 2C-1)
char* br = getenv("HAKMEM_TINY_BG_REMOTE");
if (br) g_bg_remote_enable = (atoi(br) != 0) ? 1 : 0;
char* rb = getenv("HAKMEM_TINY_BG_REMOTE_BATCH");
if (rb) { int v = atoi(rb); if (v > 0 && v <= 4096) g_bg_remote_batch = v; }
for (int k = 0; k < TINY_NUM_CLASSES; k++) {
atomic_store_explicit(&g_remote_target_head[k], (uintptr_t)0, memory_order_relaxed);
atomic_store_explicit(&g_remote_target_len[k], 0u, memory_order_relaxed);
}
// bg thread already started above if bg_bin_enable=1; if only spill is enabled, start thread
if (g_bg_spill_enable && !g_bg_bin_started) {
if (pthread_create(&g_bg_bin_thread, NULL, tiny_bg_refill_main, NULL) == 0) {
g_bg_bin_started = 1;
g_bg_bin_enable = 1; // reuse loop
} else {
g_bg_spill_enable = 0;
}
}
}
// Optional prefetch enable
{
char* pf = getenv("HAKMEM_TINY_PREFETCH");
if (pf && atoi(pf) != 0) g_tiny_prefetch = 1;
}
// Refill batch tuning
char* rmax = getenv("HAKMEM_TINY_REFILL_MAX");
if (rmax) { int v = atoi(rmax); if (v > 0) g_tiny_refill_max = v; }
char* rmaxh = getenv("HAKMEM_TINY_REFILL_MAX_HOT");
if (rmaxh) { int v = atoi(rmaxh); if (v > 0) g_tiny_refill_max_hot = v; }
// Per-class overrides: HAKMEM_TINY_REFILL_MAX_C{0..7}, HAKMEM_TINY_REFILL_MAX_HOT_C{0..7}
for (int k = 0; k < TINY_NUM_CLASSES; k++) {
char key1[64]; snprintf(key1, sizeof(key1), "HAKMEM_TINY_REFILL_MAX_C%d", k);
char* v1 = getenv(key1); if (v1) { int vv = atoi(v1); if (vv > 0) g_refill_max_c[k] = vv; }
char key2[64]; snprintf(key2, sizeof(key2), "HAKMEM_TINY_REFILL_MAX_HOT_C%d", k);
char* v2 = getenv(key2); if (v2) { int vv = atoi(v2); if (vv > 0) g_refill_max_hot_c[k] = vv; }
}
// Stats sampling rate (compile-time gated) via env HAKMEM_TINY_STAT_RATE_LG
#if defined(HAKMEM_ENABLE_STATS) && defined(HAKMEM_TINY_STAT_SAMPLING)
{
char* sr = getenv("HAKMEM_TINY_STAT_RATE_LG");
if (sr) { int lg = atoi(sr); if (lg >= 0 && lg <= 31) g_stat_rate_lg = lg; }
// 関数ポインタ選択(分岐をホットパスから排除)
g_stat_alloc_fn = (g_stat_rate_lg == 0) ? hkm_stat_alloc_always : hkm_stat_alloc_sampled;
}
#elif defined(HAKMEM_ENABLE_STATS)
// サンプリング未使用時は毎回更新
// FIXME: g_stat_alloc_fn and hkm_stat_alloc_always not yet implemented
// Stats are recorded via hkm_stat_alloc() in HAK_RET_ALLOC macro instead
// g_stat_alloc_fn = hkm_stat_alloc_always;
#endif
// Spill hysteresisfreeホットパスでgetenvしない
{
char* sh = getenv("HAKMEM_TINY_SPILL_HYST");
if (sh) { int v = atoi(sh); if (v < 0) v = 0; g_spill_hyst = v; }
}
char* ultra_env = getenv("HAKMEM_TINY_ULTRA");
if (ultra_env && atoi(ultra_env) != 0) {
g_tiny_ultra = 1;
}
char* uval = getenv("HAKMEM_TINY_ULTRA_VALIDATE");
if (uval && atoi(uval) != 0) {
g_ultra_validate = 1;
}
// Ultra env overrides: per-class batch and sll_cap
// HAKMEM_TINY_ULTRA_BATCH_C{0..7}, HAKMEM_TINY_ULTRA_SLL_CAP_C{0..7}
char var[64];
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
snprintf(var, sizeof(var), "HAKMEM_TINY_ULTRA_BATCH_C%d", i);
char* vb = getenv(var);
if (vb) { int v = atoi(vb); if (v > 0) g_ultra_batch_override[i] = v; }
snprintf(var, sizeof(var), "HAKMEM_TINY_ULTRA_SLL_CAP_C%d", i);
char* vc = getenv(var);
if (vc) { int v = atoi(vc); if (v > 0) g_ultra_sll_cap_override[i] = v; }
// Normal-path per-class overrides
snprintf(var, sizeof(var), "HAKMEM_TINY_MAG_CAP_C%d", i);
char* vm = getenv(var);
if (vm) { int v = atoi(vm); if (v > 0 && v <= TINY_TLS_MAG_CAP) g_mag_cap_override[i] = v; }
snprintf(var, sizeof(var), "HAKMEM_TINY_SLL_CAP_C%d", i);
char* vs = getenv(var);
if (vs) { int v = atoi(vs); if (v > 0 && v <= TINY_TLS_MAG_CAP) g_sll_cap_override[i] = v; }
// Front refill count per-class override (fast path tuning)
snprintf(var, sizeof(var), "HAKMEM_TINY_REFILL_COUNT_C%d", i);
char* rc = getenv(var);
if (rc) { int v = atoi(rc); if (v < 0) v = 0; if (v > 256) v = 256; g_refill_count_class[i] = v; }
}
// Front refill count globals
{
char* g = getenv("HAKMEM_TINY_REFILL_COUNT");
if (g) { int v = atoi(g); if (v < 0) v = 0; if (v > 256) v = 256; g_refill_count_global = v; }
char* h = getenv("HAKMEM_TINY_REFILL_COUNT_HOT");
if (h) { int v = atoi(h); if (v < 0) v = 0; if (v > 256) v = 256; g_refill_count_hot = v; }
char* m = getenv("HAKMEM_TINY_REFILL_COUNT_MID");
if (m) { int v = atoi(m); if (v < 0) v = 0; if (v > 256) v = 256; g_refill_count_mid = v; }
}
// Sensible default for class 7 (1024B): favor larger refill to reduce refills/syscalls
if (g_refill_count_class[7] == 0) {
g_refill_count_class[7] = 64; // can be overridden by env HAKMEM_TINY_REFILL_COUNT_C7
}
{
char* fast_env = getenv("HAKMEM_TINY_FAST");
if (fast_env && atoi(fast_env) == 0) g_fast_enable = 0;
int fast_global = -1;
char* fast_cap_env = getenv("HAKMEM_TINY_FAST_CAP");
if (fast_cap_env) {
int v = atoi(fast_cap_env);
if (v >= 0 && v <= TINY_TLS_MAG_CAP) fast_global = v;
}
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
uint16_t cap = g_fast_cap_defaults[i];
if (fast_global >= 0) cap = (uint16_t)fast_global;
snprintf(var, sizeof(var), "HAKMEM_TINY_FAST_CAP_C%d", i);
char* fc = getenv(var);
if (fc) {
int v = atoi(fc);
if (v < 0) v = 0;
if (v > TINY_TLS_MAG_CAP) v = TINY_TLS_MAG_CAP;
cap = (uint16_t)v;
g_fast_cap_locked[i] = 1;
} else if (fast_global >= 0) {
g_fast_cap_locked[i] = 1;
} else {
g_fast_cap_locked[i] = 0;
}
g_fast_cap[i] = cap;
}
}
{
const char* dbg_fast = getenv("HAKMEM_TINY_DEBUG_FAST0");
if (dbg_fast && atoi(dbg_fast) != 0) {
g_debug_fast0 = 1;
g_fast_enable = 0;
g_hotmag_enable = 0;
g_tls_list_enable = 0;
}
const char* dbg_remote = getenv("HAKMEM_TINY_DEBUG_REMOTE_GUARD");
if (dbg_remote && atoi(dbg_remote) != 0) {
g_debug_remote_guard = 1;
}
const char* rf_force = getenv("HAKMEM_TINY_RF_FORCE_NOTIFY");
if (rf_force && atoi(rf_force) != 0) {
extern int g_remote_force_notify;
g_remote_force_notify = 1;
}
const char* safe_free = getenv("HAKMEM_SAFE_FREE");
if (safe_free && atoi(safe_free) != 0) {
extern int g_tiny_safe_free; g_tiny_safe_free = 1;
}
const char* safe_free_strict = getenv("HAKMEM_SAFE_FREE_STRICT");
if (safe_free_strict && atoi(safe_free_strict) != 0) {
extern int g_tiny_safe_free_strict; g_tiny_safe_free_strict = 1;
}
const char* force_remote = getenv("HAKMEM_TINY_FORCE_REMOTE");
if (force_remote && atoi(force_remote) != 0) {
extern int g_tiny_force_remote; g_tiny_force_remote = 1;
}
// Remote side-table (debug only)
tiny_remote_side_init_from_env();
}
static int g_super_trace = -1;
if (__builtin_expect(g_super_trace == -1, 0)) {
const char* tr = getenv("HAKMEM_TINY_SUPERSLAB_TRACE");
g_super_trace = (tr && atoi(tr) != 0) ? 1 : 0;
}
if (g_super_trace) {
static int logged_once = 0;
if (!logged_once) {
fprintf(stderr, "[SUPERTRACE] mem_diet=%d env=%s g_use_superslab=%d fast_enable=%d cap0=%u cap1=%u cap2=%u cap3=%u cap4=%u reslist=%d\n",
mem_diet_enabled,
superslab_env ? superslab_env : "(null)",
g_use_superslab,
g_fast_enable,
(unsigned)g_fast_cap[0],
(unsigned)g_fast_cap[1],
(unsigned)g_fast_cap[2],
(unsigned)g_fast_cap[3],
(unsigned)g_fast_cap[4],
g_tls_list_enable);
logged_once = 1;
}
}
tiny_ace_init_defaults();
char* fc_env = getenv("HAKMEM_TINY_FASTCACHE");
if (fc_env && atoi(fc_env) != 0) {
g_fastcache_enable = 1;
}
char* fe_env = getenv("HAKMEM_TINY_FRONTEND");
if (fe_env && atoi(fe_env) != 0) {
g_frontend_enable = 1;
}
// TinyQuickSlot opt-in
{
char* q = getenv("HAKMEM_TINY_QUICK");
if (q && atoi(q) != 0) g_quick_enable = 1;
}
tiny_obs_start_if_needed();
// Deferred Intelligence Engine
char* ie = getenv("HAKMEM_INT_ENGINE");
if (ie && atoi(ie) != 0) {
g_int_engine = 1;
// Initialize frontend fill targets to zero (let engine grow if hot)
for (int i = 0; i < TINY_NUM_CLASSES; i++) atomic_store(&g_frontend_fill_target[i], 0);
// Event logging knobs (optional)
char* its = getenv("HAKMEM_INT_EVENT_TS");
if (its && atoi(its) != 0) g_int_event_ts = 1;
char* ism = getenv("HAKMEM_INT_SAMPLE");
if (ism) { int n = atoi(ism); if (n > 0 && n < 31) g_int_sample_mask = ((1u << n) - 1u); }
if (pthread_create(&g_int_thread, NULL, intelligence_engine_main, NULL) == 0) {
g_int_started = 1;
}
}
// Step 2: Initialize Slab Registry (only if enabled)
if (g_use_registry) {
memset(g_slab_registry, 0, sizeof(g_slab_registry));
}
// Initialize per-class locks
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
pthread_mutex_init(&g_tiny_class_locks[i].m, NULL);
}
// Phase 8.3: Initialize ACE (Adaptive Cache Engine) state
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
g_ss_ace[i].current_lg = 20; // Start with 1MB SuperSlabs
g_ss_ace[i].target_lg = 20; // Default to 1MB
g_ss_ace[i].hot_score = 0;
g_ss_ace[i].alloc_count = 0;
g_ss_ace[i].refill_count = 0;
g_ss_ace[i].spill_count = 0;
g_ss_ace[i].live_blocks = 0;
g_ss_ace[i].last_tick_ns = 0;
}
// Lite P1: Pre-allocate Tier 1 (8-64B) hot classes only
// This avoids initialization overhead for common small allocations
// Classes 0-3: 8B, 16B, 32B, 64B (256KB total, not 512KB)
for (int class_idx = 0; class_idx < 4; class_idx++) {
TinySlab* slab = allocate_new_slab(class_idx);
if (slab) {
slab->next = g_tiny_pool.free_slabs[class_idx];
g_tiny_pool.free_slabs[class_idx] = slab;
}
}
CRITICAL FIX: TLS 未初期化による 4T SEGV を完全解消 **問題:** - Larson 4T で 100% SEGV (1T は 2.09M ops/s で完走) - System/mimalloc は 4T で 33.52M ops/s 正常動作 - SS OFF + Remote OFF でも 4T で SEGV **根本原因: (Task agent ultrathink 調査結果)** ``` CRASH: mov (%r15),%r13 R15 = 0x6261 ← ASCII "ba" (ゴミ値、未初期化TLS) ``` Worker スレッドの TLS 変数が未初期化: - `__thread void* g_tls_sll_head[TINY_NUM_CLASSES];` ← 初期化なし - pthread_create() で生成されたスレッドでゼロ初期化されない - NULL チェックが通過 (0x6261 != NULL) → dereference → SEGV **修正内容:** 全 TLS 配列に明示的初期化子 `= {0}` を追加: 1. **core/hakmem_tiny.c:** - `g_tls_sll_head[TINY_NUM_CLASSES] = {0}` - `g_tls_sll_count[TINY_NUM_CLASSES] = {0}` - `g_tls_live_ss[TINY_NUM_CLASSES] = {0}` - `g_tls_bcur[TINY_NUM_CLASSES] = {0}` - `g_tls_bend[TINY_NUM_CLASSES] = {0}` 2. **core/tiny_fastcache.c:** - `g_tiny_fast_cache[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_count[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_head[TINY_FAST_CLASS_COUNT] = {0}` - `g_tiny_fast_free_count[TINY_FAST_CLASS_COUNT] = {0}` 3. **core/hakmem_tiny_magazine.c:** - `g_tls_mags[TINY_NUM_CLASSES] = {0}` 4. **core/tiny_sticky.c:** - `g_tls_sticky_ss[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_idx[TINY_NUM_CLASSES][TINY_STICKY_RING] = {0}` - `g_tls_sticky_pos[TINY_NUM_CLASSES] = {0}` **効果:** ``` Before: 1T: 2.09M ✅ | 4T: SEGV 💀 After: 1T: 2.41M ✅ | 4T: 4.19M ✅ (+15% 1T, SEGV解消) ``` **テスト:** ```bash # 1 thread: 完走 ./larson_hakmem 2 8 128 1024 1 12345 1 → Throughput = 2,407,597 ops/s ✅ # 4 threads: 完走(以前は SEGV) ./larson_hakmem 2 8 128 1024 1 12345 4 → Throughput = 4,192,155 ops/s ✅ ``` **調査協力:** Task agent (ultrathink mode) による完璧な根本原因特定 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:27:04 +09:00
if (__builtin_expect(route_enabled_runtime(), 0)) {
tiny_debug_ring_record(TINY_RING_EVENT_ROUTE, (uint16_t)0xFFFFu, NULL, (uintptr_t)0x494E4954u);
}
}