Files
hakmem/core/hakmem_tiny_init.inc
Moe Charm (CI) acc64f2438 Phase ML1: Pool v1 memset 89.73% overhead 軽量化 (+15.34% improvement)
## Summary
- ChatGPT により bench_profile.h の setenv segfault を修正(RTLD_NEXT 経由に切り替え)
- core/box/pool_zero_mode_box.h 新設:ENV キャッシュ経由で ZERO_MODE を統一管理
- core/hakmem_pool.c で zero mode に応じた memset 制御(FULL/header/off)
- A/B テスト結果:ZERO_MODE=header で +15.34% improvement(1M iterations, C6-heavy)

## Files Modified
- core/box/pool_api.inc.h: pool_zero_mode_box.h include
- core/bench_profile.h: glibc setenv → malloc+putenv(segfault 回避)
- core/hakmem_pool.c: zero mode 参照・制御ロジック
- core/box/pool_zero_mode_box.h (新設): enum/getter
- CURRENT_TASK.md: Phase ML1 結果記載

## Test Results
| Iterations | ZERO_MODE=full | ZERO_MODE=header | Improvement |
|-----------|----------------|-----------------|------------|
| 10K       | 3.06 M ops/s   | 3.17 M ops/s    | +3.65%     |
| 1M        | 23.71 M ops/s  | 27.34 M ops/s   | **+15.34%** |

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-10 09:08:18 +09:00

448 lines
18 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// hakmem_tiny_init.inc
// Note: uses TLS ops inline helpers for prewarm when class5 hotpath is enabled
#include "hakmem_tiny_tls_ops.h"
#include "box/prewarm_box.h" // Box Prewarm API (Priority 3)
#include "box/tiny_route_box.h"
// Phase 2D-2: Initialization function extraction
//
// This file contains the hak_tiny_init() function extracted from hakmem_tiny.c
// to improve code organization. Reduces main file by 450 lines (24%).
//
// Cold path only - called once at startup.
// Some build configurations expect this hook but do not provide an implementation.
// Provide a no-op stub so that non-debug builds continue to link without optional
// signal-dump support.
static inline void hak_tiny_enable_signal_dump(void) { }
void hak_tiny_init(void) {
if (g_tiny_initialized) return;
// Step 1: Simple initialization (static global is already zero-initialized)
g_tiny_initialized = 1;
// Route snapshot (TinyHeap vs Legacy) を起動時に固定
tiny_route_snapshot_init();
// Reset fast-cache defaults and apply preset (if provided)
tiny_config_reset_defaults();
char* preset_env = getenv("HAKMEM_TINY_PRESET");
if (preset_env) {
if (strcasecmp(preset_env, "TIGHT") == 0) {
TINY_PRESET_TIGHT();
} else if (strcasecmp(preset_env, "ULTRA_TIGHT") == 0 ||
strcasecmp(preset_env, "ULTRATIGHT") == 0) {
TINY_PRESET_ULTRA_TIGHT();
} else {
TINY_PRESET_BALANCED();
}
}
// Phase 6.14: Read environment variable for Registry ON/OFF
char* env = getenv("HAKMEM_USE_REGISTRY");
if (env) {
g_use_registry = atoi(env);
} else {
g_use_registry = 1; // Default ON for multi-thread safety
}
// Phase 6.15: Runtime toggle to allow Tiny within wrappers
// HAKMEM_WRAP_TINY=1 → enable Tiny fast-path during wrapper calls
char* wrap_env = getenv("HAKMEM_WRAP_TINY");
if (wrap_env && atoi(wrap_env) != 0) {
g_wrap_tiny_enabled = 1;
}
char* wrap_refill_env = getenv("HAKMEM_WRAP_TINY_REFILL");
if (wrap_refill_env && atoi(wrap_refill_env) != 0) {
g_wrap_tiny_refill = 1;
}
// Remote-drain knobs
char* rth = getenv("HAKMEM_TINY_REMOTE_DRAIN_THRESHOLD");
if (rth) { int v = atoi(rth); if (v > 0) g_remote_drain_thresh = v; }
char* rr = getenv("HAKMEM_TINY_REMOTE_DRAIN_TRYRATE");
if (rr) { int v = atoi(rr); if (v > 0) g_remote_drain_tryrate = v; }
char* cs = getenv("HAKMEM_TINY_COUNT_SAMPLE");
if (cs) { int v = atoi(cs); if (v>=0 && v<=16) g_tiny_count_sample_exp = v; }
int mem_diet_enabled = 1; // Default: Enable for memory efficiency
char* memdiet_env = getenv("HAKMEM_TINY_MEM_DIET");
if (memdiet_env && atoi(memdiet_env) == 0) {
mem_diet_enabled = 0; // Allow disabling via env
}
if (mem_diet_enabled) {
if (g_mag_cap_limit > 64) g_mag_cap_limit = 64;
}
// Optional: Magazine cap limit (runtime lower bound)
char* mag_env = getenv("HAKMEM_TINY_MAG_CAP");
if (mag_env) {
int val = atoi(mag_env);
if (val > 0 && val < g_mag_cap_limit) g_mag_cap_limit = val;
}
// Phase X: Initialize TLS free-list defaults
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
TinyTLSList* tls = &g_tls_lists[i];
tls->head = NULL;
tls->count = 0;
uint32_t base_cap = (uint32_t)tiny_default_cap(i);
uint32_t class_max = (uint32_t)tiny_cap_max_for_class(i);
if (base_cap > class_max) base_cap = class_max;
if ((uint32_t)g_mag_cap_limit < base_cap) base_cap = (uint32_t)g_mag_cap_limit;
if (g_mag_cap_override[i] > 0) {
uint32_t ov = (uint32_t)g_mag_cap_override[i];
if (ov > class_max) ov = class_max;
if (ov > (uint32_t)g_mag_cap_limit) ov = (uint32_t)g_mag_cap_limit;
if (ov != 0u) base_cap = ov;
}
if (base_cap == 0u) base_cap = 32u;
tls->cap = base_cap;
tls->refill_low = tiny_tls_default_refill(base_cap);
tls->spill_high = tiny_tls_default_spill(base_cap);
tiny_tls_publish_targets(i, base_cap);
}
if (mem_diet_enabled) {
tiny_apply_mem_diet();
}
// Enable signal-triggered stats dump if requested (SIGUSR1)
hak_tiny_enable_signal_dump();
// Phase 6.23: SuperSlab support (mimalloc-style fast allocation)
// Allow runtime disable/enable via env (0=off, 1=on)
// Phase 6-2.5 FIX: SuperSlab is independent from diet mode (both are performance-critical)
// - SuperSlab: Fast allocation/free (defaults to 1, set in hakmem_config.c:334)
// - Diet mode: Magazine capacity limits only (doesn't disable subsystems)
char* superslab_env = getenv("HAKMEM_TINY_USE_SUPERSLAB");
if (superslab_env) {
g_use_superslab = (atoi(superslab_env) != 0) ? 1 : 0;
}
// Initialize Super Front Cache (SFC) with bench-friendly defaults
// Enabled by default; can be disabled via HAKMEM_SFC_ENABLE=0
{
extern void sfc_init(void);
sfc_init();
}
// Note: Diet mode no longer overrides g_use_superslab (removed lines 104-105)
// SuperSlab defaults to 1 unless explicitly disabled via env var
// One-shot hint: publish/adopt requires SuperSlab ON
{
static int hint_once = 0;
if (!hint_once) {
const char* must_adopt = getenv("HAKMEM_TINY_MUST_ADOPT");
if ((!superslab_env || g_use_superslab == 0) && must_adopt && atoi(must_adopt) != 0) {
fprintf(stderr, "[HINT] HAKMEM_TINY_USE_SUPERSLAB=0: publish/adopt pipeline is disabled. Set =1 for mailbox/adopt.\n");
}
hint_once = 1;
}
}
{
char* tlslist_env = getenv("HAKMEM_TINY_TLS_LIST");
if (tlslist_env) {
g_tls_list_enable = (atoi(tlslist_env) != 0) ? 1 : 0;
}
}
// Phase 9.4: TLS SLL toggle (default ON) + class mask
{
char* sll_env = getenv("HAKMEM_TINY_TLS_SLL");
if (sll_env && atoi(sll_env) == 0) {
g_tls_sll_enable = 0;
}
// Restrict SLL usage to selected classes
extern int g_tls_sll_class_mask;
g_tls_sll_class_mask = 0xFF; // default all classes (0..7)
char* c03 = getenv("HAKMEM_TINY_SLL_C03_ONLY");
if (c03 && atoi(c03) != 0) {
g_tls_sll_class_mask = 0x0F; // classes 0..3 only
}
char* msk = getenv("HAKMEM_TINY_SLL_MASK");
if (msk && *msk) {
int v = (int)strtol(msk, NULL, 0);
if (v >= 0 && v <= 0xFF) g_tls_sll_class_mask = v;
}
}
// Path debug enabled?
{
char* pd = getenv("HAKMEM_TINY_PATH_DEBUG");
g_path_debug_enabled = (pd && atoi(pd) != 0) ? 1 : 0;
}
// Ultra-Bump TLS shadow既定ON、envでOFF可能
{
char* ub = getenv("HAKMEM_TINY_BUMP_SHADOW");
if (ub) { g_ultra_bump_shadow = (atoi(ub) != 0) ? 1 : 0; }
char* bc = getenv("HAKMEM_TINY_BUMP_CHUNK");
if (bc) { int v = atoi(bc); if (v > 0 && v < 32768) g_bump_chunk = v; }
}
// Refill-one-on-missチェーン生成を避け、1個だけ確保して返す
{
char* ro = getenv("HAKMEM_TINY_REFILL_ONE_ON_MISS");
if (ro) g_refill_one_on_miss = (atoi(ro) != 0) ? 1 : 0;
}
// SLL multiplier (hot tiny classes)
char* sllmul = getenv("HAKMEM_SLL_MULTIPLIER");
if (sllmul) {
int v = atoi(sllmul);
if (v < 1) {
v = 1;
} else if (v > 16) {
v = 16; // guardrail
}
g_sll_multiplier = v;
}
// Ultra-Simple front - REMOVED (dead code cleanup 2025-11-27)
// Background Bin/Spill/Remote: runtime ENV toggles removed (fixed OFF)
// Initialize heads to keep structures consistent.
for (int k = 0; k < TINY_NUM_CLASSES; k++) {
atomic_store_explicit(&g_bg_bin_head[k], (uintptr_t)0, memory_order_relaxed);
atomic_store_explicit(&g_remote_target_head[k], (uintptr_t)0, memory_order_relaxed);
atomic_store_explicit(&g_remote_target_len[k], 0u, memory_order_relaxed);
}
// Optional prefetch enable
{
char* pf = getenv("HAKMEM_TINY_PREFETCH");
if (pf && atoi(pf) != 0) g_tiny_prefetch = 1;
}
// Refill batch tuning
char* rmax = getenv("HAKMEM_TINY_REFILL_MAX");
if (rmax) { int v = atoi(rmax); if (v > 0) g_tiny_refill_max = v; }
char* rmaxh = getenv("HAKMEM_TINY_REFILL_MAX_HOT");
if (rmaxh) { int v = atoi(rmaxh); if (v > 0) g_tiny_refill_max_hot = v; }
// Per-class overrides: HAKMEM_TINY_REFILL_MAX_C{0..7}, HAKMEM_TINY_REFILL_MAX_HOT_C{0..7}
for (int k = 0; k < TINY_NUM_CLASSES; k++) {
char key1[64]; snprintf(key1, sizeof(key1), "HAKMEM_TINY_REFILL_MAX_C%d", k);
char* v1 = getenv(key1); if (v1) { int vv = atoi(v1); if (vv > 0) g_refill_max_c[k] = vv; }
char key2[64]; snprintf(key2, sizeof(key2), "HAKMEM_TINY_REFILL_MAX_HOT_C%d", k);
char* v2 = getenv(key2); if (v2) { int vv = atoi(v2); if (vv > 0) g_refill_max_hot_c[k] = vv; }
}
// Stats sampling rate (compile-time gated) via env HAKMEM_TINY_STAT_RATE_LG
#if defined(HAKMEM_ENABLE_STATS) && defined(HAKMEM_TINY_STAT_SAMPLING)
{
char* sr = getenv("HAKMEM_TINY_STAT_RATE_LG");
if (sr) { int lg = atoi(sr); if (lg >= 0 && lg <= 31) g_stat_rate_lg = lg; }
// 関数ポインタ選択(分岐をホットパスから排除)
g_stat_alloc_fn = (g_stat_rate_lg == 0) ? hkm_stat_alloc_always : hkm_stat_alloc_sampled;
}
#elif defined(HAKMEM_ENABLE_STATS)
// サンプリング未使用時は毎回更新
// FIXME: g_stat_alloc_fn and hkm_stat_alloc_always not yet implemented
// Stats are recorded via hkm_stat_alloc() in HAK_RET_ALLOC macro instead
// g_stat_alloc_fn = hkm_stat_alloc_always;
#endif
// Spill hysteresisfreeホットパスでgetenvしない
{
char* sh = getenv("HAKMEM_TINY_SPILL_HYST");
if (sh) { int v = atoi(sh); if (v < 0) v = 0; g_spill_hyst = v; }
}
char* ultra_env = getenv("HAKMEM_TINY_ULTRA");
if (ultra_env && atoi(ultra_env) != 0) {
g_tiny_ultra = 1;
}
char* uval = getenv("HAKMEM_TINY_ULTRA_VALIDATE");
if (uval && atoi(uval) != 0) {
g_ultra_validate = 1;
}
// Ultra env overrides: per-class batch and sll_cap
// HAKMEM_TINY_ULTRA_BATCH_C{0..7}, HAKMEM_TINY_ULTRA_SLL_CAP_C{0..7}
char var[64];
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
snprintf(var, sizeof(var), "HAKMEM_TINY_ULTRA_BATCH_C%d", i);
char* vb = getenv(var);
if (vb) { int v = atoi(vb); if (v > 0) g_ultra_batch_override[i] = v; }
snprintf(var, sizeof(var), "HAKMEM_TINY_ULTRA_SLL_CAP_C%d", i);
char* vc = getenv(var);
if (vc) { int v = atoi(vc); if (v > 0) g_ultra_sll_cap_override[i] = v; }
// Normal-path per-class overrides
snprintf(var, sizeof(var), "HAKMEM_TINY_MAG_CAP_C%d", i);
char* vm = getenv(var);
if (vm) { int v = atoi(vm); if (v > 0 && v <= TINY_TLS_MAG_CAP) g_mag_cap_override[i] = v; }
// Front refill count per-class override (fast path tuning)
snprintf(var, sizeof(var), "HAKMEM_TINY_REFILL_COUNT_C%d", i);
char* rc = getenv(var);
if (rc) { int v = atoi(rc); if (v < 0) v = 0; if (v > 256) v = 256; g_refill_count_class[i] = v; }
}
// Front refill count globals
// Phase 10: Set aggressive defaults for hot and mid classes
{
char* g = getenv("HAKMEM_TINY_REFILL_COUNT");
if (g) { int v = atoi(g); if (v < 0) v = 0; if (v > 256) v = 256; g_refill_count_global = v; }
else { g_refill_count_global = 64; } // Phase 10: default 64 (was 16)
char* h = getenv("HAKMEM_TINY_REFILL_COUNT_HOT");
if (h) { int v = atoi(h); if (v < 0) v = 0; if (v > 256) v = 256; g_refill_count_hot = v; }
else { g_refill_count_hot = 128; } // Phase 10: default 128 for hot classes (C0-C3)
char* m = getenv("HAKMEM_TINY_REFILL_COUNT_MID");
if (m) { int v = atoi(m); if (v < 0) v = 0; if (v > 256) v = 256; g_refill_count_mid = v; }
else { g_refill_count_mid = 96; } // Phase 10: default 96 for mid classes (C4-C7)
}
// Sensible default for class 7 (1024B): favor larger refill to reduce refills/syscalls
if (g_refill_count_class[7] == 0) {
g_refill_count_class[7] = 128; // Phase 10: increased from 64 to 128
}
{
char* fast_env = getenv("HAKMEM_TINY_FAST");
if (fast_env && atoi(fast_env) == 0) g_fast_enable = 0;
int fast_global = -1;
char* fast_cap_env = getenv("HAKMEM_TINY_FAST_CAP");
if (fast_cap_env) {
int v = atoi(fast_cap_env);
if (v >= 0 && v <= TINY_TLS_MAG_CAP) fast_global = v;
}
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
uint16_t cap = g_fast_cap_defaults[i];
if (fast_global >= 0) cap = (uint16_t)fast_global;
snprintf(var, sizeof(var), "HAKMEM_TINY_FAST_CAP_C%d", i);
char* fc = getenv(var);
if (fc) {
int v = atoi(fc);
if (v < 0) v = 0;
if (v > TINY_TLS_MAG_CAP) v = TINY_TLS_MAG_CAP;
cap = (uint16_t)v;
g_fast_cap_locked[i] = 1;
} else if (fast_global >= 0) {
g_fast_cap_locked[i] = 1;
} else {
g_fast_cap_locked[i] = 0;
}
g_fast_cap[i] = cap;
}
}
{
const char* dbg_fast = getenv("HAKMEM_TINY_DEBUG_FAST0");
if (dbg_fast && atoi(dbg_fast) != 0) {
g_debug_fast0 = 1;
g_fast_enable = 0;
g_hotmag_enable = 0;
g_tls_list_enable = 0;
}
const char* dbg_remote = getenv("HAKMEM_TINY_DEBUG_REMOTE_GUARD");
if (dbg_remote && atoi(dbg_remote) != 0) {
g_debug_remote_guard = 1;
}
const char* rf_force = getenv("HAKMEM_TINY_RF_FORCE_NOTIFY");
if (rf_force && atoi(rf_force) != 0) {
extern int g_remote_force_notify;
g_remote_force_notify = 1;
}
const char* safe_free = getenv("HAKMEM_SAFE_FREE");
if (safe_free && atoi(safe_free) != 0) {
extern int g_tiny_safe_free; g_tiny_safe_free = 1;
}
const char* safe_free_strict = getenv("HAKMEM_SAFE_FREE_STRICT");
if (safe_free_strict && atoi(safe_free_strict) != 0) {
extern int g_tiny_safe_free_strict; g_tiny_safe_free_strict = 1;
}
const char* force_remote = getenv("HAKMEM_TINY_FORCE_REMOTE");
if (force_remote && atoi(force_remote) != 0) {
extern int g_tiny_force_remote; g_tiny_force_remote = 1;
}
// Remote side-table (debug only)
tiny_remote_side_init_from_env();
}
static int g_super_trace = -1;
if (__builtin_expect(g_super_trace == -1, 0)) {
const char* tr = getenv("HAKMEM_TINY_SUPERSLAB_TRACE");
g_super_trace = (tr && atoi(tr) != 0) ? 1 : 0;
}
if (g_super_trace) {
static int logged_once = 0;
if (!logged_once) {
fprintf(stderr, "[SUPERTRACE] mem_diet=%d env=%s g_use_superslab=%d fast_enable=%d cap0=%u cap1=%u cap2=%u cap3=%u cap4=%u reslist=%d\n",
mem_diet_enabled,
superslab_env ? superslab_env : "(null)",
g_use_superslab,
g_fast_enable,
(unsigned)g_fast_cap[0],
(unsigned)g_fast_cap[1],
(unsigned)g_fast_cap[2],
(unsigned)g_fast_cap[3],
(unsigned)g_fast_cap[4],
g_tls_list_enable);
logged_once = 1;
}
}
tiny_ace_init_defaults();
char* fc_env = getenv("HAKMEM_TINY_FASTCACHE");
if (fc_env && atoi(fc_env) != 0) {
g_fastcache_enable = 1;
}
char* fe_env = getenv("HAKMEM_TINY_FRONTEND");
if (fe_env && atoi(fe_env) != 0) {
g_frontend_enable = 1;
}
// TinyQuickSlot opt-in
{
char* q = getenv("HAKMEM_TINY_QUICK");
if (q && atoi(q) != 0) g_quick_enable = 1;
}
// Tiny Front Routing Policy: initialize per-class Tiny vs Pool routing.
// ENV: HAKMEM_TINY_PROFILE = hot / conservative / off / full
// - conservative (default): 全クラス TINY_FIRST
// - hot: C0-C3=TINY_ONLY, C4-C6=TINY_FIRST, C7=POOL_ONLY
// - off: 全クラス POOL_ONLY
// - full: 全クラス TINY_ONLY
tiny_route_init();
// OBS/INT エンジンは無効化(実験用)。必要なら復活させる。
// Step 2: Initialize Slab Registry (only if enabled)
if (g_use_registry) {
memset(g_slab_registry, 0, sizeof(g_slab_registry));
}
// Initialize per-class locks
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
pthread_mutex_init(&g_tiny_class_locks[i].m, NULL);
}
// Phase 8.3: Initialize ACE (Adaptive Cache Engine) state
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
g_ss_ace[i].current_lg = 20; // Start with 1MB SuperSlabs
g_ss_ace[i].target_lg = 20; // Default to 1MB
g_ss_ace[i].hot_score = 0;
g_ss_ace[i].alloc_count = 0;
g_ss_ace[i].refill_count = 0;
g_ss_ace[i].spill_count = 0;
g_ss_ace[i].live_blocks = 0;
g_ss_ace[i].last_tick_ns = 0;
}
// Lite P1: Pre-allocate Tier 1 (8-64B) hot classes only
// This avoids initialization overhead for common small allocations
// Classes 0-3: 8B, 16B, 32B, 64B (256KB total, not 512KB)
for (int class_idx = 0; class_idx < 4; class_idx++) {
TinySlab* slab = allocate_new_slab(class_idx);
if (slab) {
slab->next = g_tiny_pool.free_slabs[class_idx];
g_tiny_pool.free_slabs[class_idx] = slab;
}
}
// Phase 11: Initialize SuperSlab Registry and LRU Cache
if (g_use_superslab) {
extern void hak_super_registry_init(void);
extern void hak_ss_lru_init(void);
extern void hak_ss_prewarm_init(void);
hak_super_registry_init();
hak_ss_lru_init();
// Phase 11: Prewarm SuperSlabs to eliminate mmap/munmap churn
// ENV: HAKMEM_PREWARM_SUPERSLABS=<count> (e.g., 32, 128)
hak_ss_prewarm_init();
}
if (__builtin_expect(route_enabled_runtime(), 0)) {
tiny_debug_ring_record(TINY_RING_EVENT_ROUTE, (uint16_t)0xFFFFu, NULL, (uintptr_t)0x494E4954u);
}
}