diff --git a/core/box/front_gate_classifier.c b/core/box/front_gate_classifier.c index 813dfac2..3a3518a8 100644 --- a/core/box/front_gate_classifier.c +++ b/core/box/front_gate_classifier.c @@ -199,16 +199,24 @@ ptr_classification_t classify_ptr(void* ptr) { uint8_t header = *((uint8_t*)ptr - 1); uint8_t magic = header & 0xF0; - // Fast path: Tiny allocation (magic = 0xa0) + // Fast path: Tiny allocation (magic = 0xa0) — guarded by Superslab registry if (magic == HEADER_MAGIC) { // HEADER_MAGIC = 0xa0 int class_idx = header & HEADER_CLASS_MASK; if (class_idx >= 0 && class_idx < TINY_NUM_CLASSES) { - result.kind = PTR_KIND_TINY_HEADER; - result.class_idx = class_idx; + SuperSlab* ss = hak_super_lookup(ptr); + if (ss && ss->magic == SUPERSLAB_MAGIC) { + result.kind = PTR_KIND_TINY_HEADER; + result.class_idx = class_idx; + result.ss = ss; #if !HAKMEM_BUILD_RELEASE - g_classify_header_hit++; + g_classify_header_hit++; #endif - return result; + return result; + } else { + // Superslab未登録 → hakmem外。Tiny扱いしない。 + result.kind = PTR_KIND_UNKNOWN; + return result; + } } } diff --git a/core/box/hak_alloc_api.inc.h b/core/box/hak_alloc_api.inc.h index 7e770cc9..29339d08 100644 --- a/core/box/hak_alloc_api.inc.h +++ b/core/box/hak_alloc_api.inc.h @@ -106,6 +106,14 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) { hkm_size_hist_record(size); + // Legacy Mid MT allocator (Phase 5) is disabled by default to favor ACE/Pool. + // Enable via HAKMEM_MID_MT_ENABLE=1 when running legacy benchmarks. + static int g_mid_mt_enabled = -1; + if (__builtin_expect(g_mid_mt_enabled < 0, 0)) { + const char* e = getenv("HAKMEM_MID_MT_ENABLE"); + g_mid_mt_enabled = (e && *e && *e != '0') ? 1 : 0; + } + #ifdef HAKMEM_POOL_TLS_PHASE1 // Phase 1: Ultra-fast Pool TLS for 8KB-52KB range if (size >= 8192 && size <= 53248) { @@ -116,7 +124,7 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) { } #endif - if (__builtin_expect(mid_is_in_range(size), 0)) { + if (__builtin_expect(g_mid_mt_enabled && mid_is_in_range(size), 0)) { #if HAKMEM_DEBUG_TIMING HKM_TIME_START(t_mid); #endif diff --git a/core/box/hak_wrappers.inc.h b/core/box/hak_wrappers.inc.h index b53515e8..6fdd549f 100644 --- a/core/box/hak_wrappers.inc.h +++ b/core/box/hak_wrappers.inc.h @@ -30,6 +30,7 @@ void* realloc(void* ptr, size_t size) { #include "../ptr_trace.h" // Debug: pointer trace immediate dump on libc fallback #include "front_gate_classifier.h" // Box FG: pointer classification (header/reg) +#include "../hakmem_pool.h" // Mid registry lookup (failsafe for headerless Mid) #include "../front/malloc_tiny_fast.h" // Phase 26: Front Gate Unification #include "tiny_front_config_box.h" // Phase 4-Step3: Compile-time config for dead code elimination #include "mid_free_route_box.h" // Phase 5-Step2: Mid MT free routing fix @@ -76,7 +77,13 @@ void* malloc(size_t size) { // CRITICAL FIX (BUG #7): Increment lock depth FIRST, before ANY libc calls // This prevents infinite recursion when getenv/fprintf/dlopen call malloc g_hakmem_lock_depth++; - if (size == 33000) write(2, "STEP:1 Lock++\n", 14); + // Debug step trace for 33KB: gated by env HAKMEM_STEP_TRACE (default: OFF) + static int g_step_trace = -1; + if (__builtin_expect(g_step_trace == -1, 0)) { + const char* e = getenv("HAKMEM_STEP_TRACE"); + g_step_trace = (e && *e && *e != '0') ? 1 : 0; + } + if (g_step_trace && size == 33000) write(2, "STEP:1 Lock++\n", 14); // Guard against recursion during initialization if (__builtin_expect(g_initializing != 0, 0)) { @@ -100,11 +107,11 @@ void* malloc(size_t size) { if (size == 33000) write(2, "RET:ForceLibc\n", 14); return __libc_malloc(size); } - if (size == 33000) write(2, "STEP:2 ForceLibc passed\n", 24); + if (g_step_trace && size == 33000) write(2, "STEP:2 ForceLibc passed\n", 24); int ld_mode = hak_ld_env_mode(); if (ld_mode) { - if (size == 33000) write(2, "STEP:3 LD Mode\n", 15); + if (g_step_trace && size == 33000) write(2, "STEP:3 LD Mode\n", 15); if (hak_ld_block_jemalloc() && g_jemalloc_loaded) { g_hakmem_lock_depth--; extern void* __libc_malloc(size_t); @@ -131,7 +138,7 @@ void* malloc(size_t size) { return __libc_malloc(size); } } - if (size == 33000) write(2, "STEP:4 LD Check passed\n", 23); + if (g_step_trace && size == 33000) write(2, "STEP:4 LD Check passed\n", 23); // Phase 26: CRITICAL - Ensure initialization before fast path // (fast path bypasses hak_alloc_at, so we need to init here) @@ -145,9 +152,9 @@ void* malloc(size_t size) { // Phase 4-Step3: Use config macro for compile-time optimization // Phase 7-Step1: Changed expect hint from 0→1 (unified path is now LIKELY) if (__builtin_expect(TINY_FRONT_UNIFIED_GATE_ENABLED, 1)) { - if (size == 33000) write(2, "STEP:5 Unified Gate check\n", 26); + if (g_step_trace && size == 33000) write(2, "STEP:5 Unified Gate check\n", 26); if (size <= tiny_get_max_size()) { - if (size == 33000) write(2, "STEP:5.1 Inside Unified\n", 24); + if (g_step_trace && size == 33000) write(2, "STEP:5.1 Inside Unified\n", 24); void* ptr = malloc_tiny_fast(size); if (__builtin_expect(ptr != NULL, 1)) { g_hakmem_lock_depth--; @@ -157,7 +164,7 @@ void* malloc(size_t size) { // Unified Cache miss → fallback to normal path (hak_alloc_at) } } - if (size == 33000) write(2, "STEP:6 All checks passed\n", 25); + if (g_step_trace && size == 33000) write(2, "STEP:6 All checks passed\n", 25); #if !HAKMEM_BUILD_RELEASE if (count > 14250 && count < 14280 && size <= 1024) { @@ -238,6 +245,12 @@ void free(void* ptr) { default: break; } } + if (!is_hakmem_owned) { + // Failsafe: Mid registry lookup catches headerless/corrupted Mid allocations + if (hak_pool_mid_lookup(ptr, NULL)) { + is_hakmem_owned = 1; + } + } if (is_hakmem_owned) { // Route to hak_free_at even if lock_depth>0(ログ抑制のためptr_traceのみ使用) @@ -316,14 +329,27 @@ void free(void* ptr) { if (offset_in_page > 0) { // Not page-aligned, safe to check ptr-1 uint8_t header = *((uint8_t*)ptr - 1); - if ((header & 0xF0) == 0xA0 || (header & 0xF0) == 0xB0) { - // HEADER_MAGIC found (0xa0 or 0xb0) → hakmem Tiny allocation + if ((header & 0xF0) == 0xA0) { + // Tiny header byte → require Superslab to avoid誤分類 + SuperSlab* ss = hak_super_lookup(ptr); + if (ss && ss->magic == SUPERSLAB_MAGIC) { + g_hakmem_lock_depth++; + hak_free_at(ptr, 0, HAK_CALLSITE()); + g_hakmem_lock_depth--; + return; + } + // Superslab未登録 → hakmem管理外。libc free にも渡さず無視(ワークセットのゴミ対策)。 + return; + } else if ((header & 0xF0) == 0xB0) { + // Pool TLS header (if enabled) — no registry check needed +#ifdef HAKMEM_POOL_TLS_PHASE1 g_hakmem_lock_depth++; hak_free_at(ptr, 0, HAK_CALLSITE()); g_hakmem_lock_depth--; return; +#endif } - // No header magic → external pointer (BenchMeta, libc allocation, etc.) + // No valid hakmem header → external pointer (BenchMeta, libc allocation, etc.) extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_external_nomag"); __libc_free(ptr); diff --git a/core/box/pool_api.inc.h b/core/box/pool_api.inc.h index b956b397..3dbf8e8d 100644 --- a/core/box/pool_api.inc.h +++ b/core/box/pool_api.inc.h @@ -16,10 +16,17 @@ void* hak_pool_try_alloc(size_t size, uintptr_t site_id) { // Debug for 33-41KB allocations if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] hak_pool_try_alloc: size=%zu (after init)\n", size); } - // P1.7 approach: Avoid using pool during ALL wrapper calls (conservative but safe) + // P1.7 guard: allow pool by default even when called from wrappers. + // Only block if explicitly disabled via env or during nested recursion. extern int hak_in_wrapper(void); - if (hak_in_wrapper() && !g_wrap_l2_enabled) { - if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] REJECTED: in_wrapper=%d, wrap_l2=%d\n", hak_in_wrapper(), g_wrap_l2_enabled); } + extern __thread int g_hakmem_lock_depth; + int in_wrapper = hak_in_wrapper(); + if (in_wrapper && g_hakmem_lock_depth > 1) { + if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] REJECTED: nested wrapper depth=%d\n", g_hakmem_lock_depth); } + return NULL; + } + if (in_wrapper && !g_wrap_l2_enabled) { + if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] REJECTED: in_wrapper=%d, wrap_l2=%d\n", in_wrapper, g_wrap_l2_enabled); } return NULL; } if (!hak_pool_is_poolable(size)) { diff --git a/core/box/pool_init_api.inc.h b/core/box/pool_init_api.inc.h index 047e2de9..950b9b0f 100644 --- a/core/box/pool_init_api.inc.h +++ b/core/box/pool_init_api.inc.h @@ -52,7 +52,9 @@ static void hak_pool_init_impl(void) { const char* e_tls = getenv("HAKMEM_POOL_TLS_FREE"); g_pool.tls_free_enabled = (e_tls == NULL) ? 1 : (atoi(e_tls) != 0); const char* e_wrap = getenv("HAKMEM_WRAP_L2"); - g_wrap_l2_enabled = (e_wrap && atoi(e_wrap) != 0) ? 1 : 0; + if (e_wrap) { + g_wrap_l2_enabled = (atoi(e_wrap) != 0); + } const char* e_minb = getenv("HAKMEM_POOL_MIN_BUNDLE"); if (e_minb) { int v = atoi(e_minb); if (v >= 1 && v <= 8) g_pool_min_bundle = v; } const char* e_mix = getenv("HAKMEM_SHARD_MIX"); diff --git a/core/front/malloc_tiny_fast.h b/core/front/malloc_tiny_fast.h index dec5f56a..e956be47 100644 --- a/core/front/malloc_tiny_fast.h +++ b/core/front/malloc_tiny_fast.h @@ -155,6 +155,12 @@ static inline int free_tiny_fast(void* ptr) { // 4. BASE を計算して Unified Cache に push void* base = (void*)((char*)ptr - 1); + // 5. Superslab 登録確認(誤分類防止) + SuperSlab* ss_guard = hak_super_lookup(ptr); + if (__builtin_expect(!(ss_guard && ss_guard->magic == SUPERSLAB_MAGIC), 0)) { + return 0; // hakmem 管理外 → 通常 free 経路へ + } + // Cross-thread free detection (Larson MT crash fix, ENV gated) { static __thread int g_larson_fix = -1; diff --git a/core/hakmem_l25_pool.c b/core/hakmem_l25_pool.c index e6543270..50b68b89 100644 --- a/core/hakmem_l25_pool.c +++ b/core/hakmem_l25_pool.c @@ -120,7 +120,7 @@ static struct { atomic_uint remote_count[L25_NUM_CLASSES][L25_NUM_SHARDS]; } g_l25_pool; -static int g_wrap_l25_enabled = 0; // env: HAKMEM_WRAP_L25=1 to allow in wrappers +static int g_wrap_l25_enabled = 1; // env: HAKMEM_WRAP_L25=0 to disable in wrappers static int g_l25_tls_ring_enabled = 1; // env: HAKMEM_POOL_TLS_RING static int g_l25_trylock_probes = 3; // env: HAKMEM_TRYLOCK_PROBES static int g_l25_tls_lo_max = 256; // env: HAKMEM_TLS_LO_MAX @@ -722,7 +722,9 @@ void hak_l25_pool_init(void) { char* dz = getenv("HAKMEM_L25_DZ"); g_l25_pool.demand_zero = (dz && atoi(dz) != 0) ? 1 : 0; const char* e_wrap = getenv("HAKMEM_WRAP_L25"); - g_wrap_l25_enabled = (e_wrap && atoi(e_wrap) != 0) ? 1 : 0; + if (e_wrap) { + g_wrap_l25_enabled = (atoi(e_wrap) != 0); + } const char* e_ring = getenv("HAKMEM_POOL_TLS_RING"); if (e_ring) g_l25_tls_ring_enabled = (atoi(e_ring) != 0); const char* e_probe = getenv("HAKMEM_TRYLOCK_PROBES"); @@ -812,7 +814,10 @@ void* hak_l25_pool_try_alloc(size_t size, uintptr_t site_id) { if (!g_l25_pool.initialized) hak_l25_pool_init(); // P1.7 approach: Avoid using L2.5 during ALL wrapper calls (conservative but safe) extern int hak_in_wrapper(void); - if (hak_in_wrapper() && !g_wrap_l25_enabled) return NULL; + extern __thread int g_hakmem_lock_depth; + int in_wrapper = hak_in_wrapper(); + if (in_wrapper && g_hakmem_lock_depth > 1) return NULL; + if (in_wrapper && !g_wrap_l25_enabled) return NULL; if (!hak_l25_pool_is_poolable(size)) return NULL; // Get class index (inline綺麗綺麗!) diff --git a/core/hakmem_policy.c b/core/hakmem_policy.c index 7917cd93..b476fac6 100644 --- a/core/hakmem_policy.c +++ b/core/hakmem_policy.c @@ -61,7 +61,7 @@ void hkm_policy_init(void) { // W_MAX = 要求サイズの何倍までのクラスを許容するか // // 現在の値: - // - w_max_mid = 1.40 (40%切り上げ許容) - やや保守的 + // - w_max_mid = 2.00 (100%切り上げ許容) - Mid全域カバー重視 // - w_max_large = 1.30 (30%切り上げ許容) - 保守的 **問題あり** // // 問題点: @@ -69,7 +69,7 @@ void hkm_policy_init(void) { // 例: 35KB要求 → 64KB使用は 1.83倍 > 1.30 → NG → malloc fallback // // 推奨値: - // - w_max_mid = 1.40~1.60 (40-60%許容) + // - w_max_mid = 1.60~2.00 (60-100%許容) — Mid MT を切った場合はこちら // - w_max_large = 1.60 (60%許容) ⭐⭐⭐ 即効改善 // // トレードオフ: @@ -78,7 +78,7 @@ void hkm_policy_init(void) { // ======================================================================== // shard/policy maps default to 0 (noop) - pol->w_max_mid = 1.60f; // Phase 6.25: Looser for MidPool performance (was 1.40) + pol->w_max_mid = 2.00f; // Phase 7: Mid MT off → W_MAX緩和でMidクラス全域をカバー pol->w_max_large = 1.30f; // Phase 6.21: Revert to 1.30 (Bridge classes now cover 32-64KB gap) pol->w_max = 1.6f; // legacy aggregate (unused by ACE) pol->thp_threshold = 2 * 1024 * 1024; // 2MiB diff --git a/core/hakmem_pool.c b/core/hakmem_pool.c index 417dd8cd..ac15e829 100644 --- a/core/hakmem_pool.c +++ b/core/hakmem_pool.c @@ -796,7 +796,7 @@ static struct { atomic_uint_fast64_t ring_underflow __attribute__((aligned(64))); } g_pool; -static int g_wrap_l2_enabled = 0; // env: HAKMEM_WRAP_L2=1 to allow in wrappers +static int g_wrap_l2_enabled = 1; // env: HAKMEM_WRAP_L2=0 to disable in wrappers static int g_shard_mix_enabled = 0; // env: HAKMEM_SHARD_MIX=1 to enable stronger hashing static int g_tls_ring_enabled = 1; // env: HAKMEM_POOL_TLS_RING=1 to enable TLS ring static int g_trylock_probes = 3; // env: HAKMEM_TRYLOCK_PROBES (1..8) diff --git a/core/hakmem_tiny_tls_list.h b/core/hakmem_tiny_tls_list.h index 400bb2ad..039e2c26 100644 --- a/core/hakmem_tiny_tls_list.h +++ b/core/hakmem_tiny_tls_list.h @@ -63,11 +63,20 @@ static inline void* tls_list_pop(TinyTLSList* tls, int class_idx) { return NULL; } // Fail-fast: reject obviously invalid head before dereference - size_t blk = g_tiny_class_sizes[class_idx]; - if (__builtin_expect(blk == 0 || ((uintptr_t)head % blk) != 0, 0)) { - fprintf(stderr, "[TLS_LIST_POISON] cls=%d head=%p count=%u (misaligned or size=0)\n", - class_idx, head, tls->count); - tiny_failfast_abort_ptr("tls_list_pop", NULL, -1, head, "invalid_head"); + uintptr_t haddr = (uintptr_t)head; + size_t blk = (class_idx >= 0 && class_idx < TINY_NUM_CLASSES) ? g_tiny_class_sizes[class_idx] : 0; + int bad_range = (haddr < 4096) || (haddr > 0x00007fffffffffffULL) || (haddr & 0x7u); + int bad_align = (blk == 0) || (haddr % blk != 0); + if (__builtin_expect(bad_range || bad_align, 0)) { + static __thread uint8_t s_log_limit_pop = 0; + if (s_log_limit_pop < 4) { + fprintf(stderr, "[TLS_LIST_POISON] cls=%d head=%p count=%u range_bad=%d align_bad=%d blk=%zu\n", + class_idx, head, tls->count, bad_range, bad_align, blk); + s_log_limit_pop++; + } + if (__builtin_expect(tiny_refill_failfast_level() >= 1, 0)) { + tiny_failfast_abort_ptr("tls_list_pop", NULL, -1, head, "invalid_head"); + } tls->head = NULL; tls->count = 0; return NULL; @@ -129,6 +138,13 @@ static inline void tls_list_push(TinyTLSList* tls, void* node, int class_idx) { // - caller handles spill/thresholds separately static inline void* tls_list_pop_fast(TinyTLSList* tls, int class_idx) { void* head = tls->head; if (!head) return NULL; + uintptr_t haddr = (uintptr_t)head; + size_t blk = (class_idx >= 0 && class_idx < TINY_NUM_CLASSES) ? g_tiny_class_sizes[class_idx] : 0; + if (__builtin_expect(haddr < 4096 || haddr > 0x00007fffffffffffULL || (haddr & 0x7u) || blk == 0 || (haddr % blk != 0), 0)) { + tls->head = NULL; + tls->count = 0; + return NULL; + } tls->head = tiny_next_read(class_idx, head); if (tls->count > 0) tls->count--; return head; diff --git a/hakmem.d b/hakmem.d index 01846fb0..3ab2015b 100644 --- a/hakmem.d +++ b/hakmem.d @@ -24,12 +24,12 @@ hakmem.o: core/hakmem.c core/hakmem.h core/hakmem_build_flags.h \ core/box/hak_core_init.inc.h core/hakmem_phase7_config.h \ core/box/ss_hot_prewarm_box.h core/box/hak_alloc_api.inc.h \ core/box/../hakmem_tiny.h core/box/../hakmem_smallmid.h \ - core/box/../pool_tls.h core/box/mid_large_config_box.h \ - core/box/../hakmem_config.h core/box/../hakmem_features.h \ - core/box/hak_free_api.inc.h core/hakmem_tiny_superslab.h \ - core/box/../tiny_free_fast_v2.inc.h core/box/../tiny_region_id.h \ - core/box/../hakmem_build_flags.h core/box/../hakmem_tiny_config.h \ - core/box/../box/tls_sll_box.h core/box/../box/../hakmem_internal.h \ + core/box/mid_large_config_box.h core/box/../hakmem_config.h \ + core/box/../hakmem_features.h core/box/hak_free_api.inc.h \ + core/hakmem_tiny_superslab.h core/box/../tiny_free_fast_v2.inc.h \ + core/box/../tiny_region_id.h core/box/../hakmem_build_flags.h \ + core/box/../hakmem_tiny_config.h core/box/../box/tls_sll_box.h \ + core/box/../box/../hakmem_internal.h \ core/box/../box/../hakmem_tiny_config.h \ core/box/../box/../hakmem_build_flags.h \ core/box/../box/../hakmem_debug_master.h \ @@ -55,7 +55,8 @@ hakmem.o: core/hakmem.c core/hakmem.h core/hakmem_build_flags.h \ core/hakmem_tiny_integrity.h core/box/front_gate_v2.h \ core/box/external_guard_box.h core/box/ss_slab_meta_box.h \ core/box/fg_tiny_gate_box.h core/box/hak_wrappers.inc.h \ - core/box/front_gate_classifier.h core/box/../front/malloc_tiny_fast.h \ + core/box/front_gate_classifier.h core/box/../hakmem_pool.h \ + core/box/../front/malloc_tiny_fast.h \ core/box/../front/../hakmem_build_flags.h \ core/box/../front/../hakmem_tiny_config.h \ core/box/../front/../superslab/superslab_inline.h \ @@ -133,7 +134,6 @@ core/box/ss_hot_prewarm_box.h: core/box/hak_alloc_api.inc.h: core/box/../hakmem_tiny.h: core/box/../hakmem_smallmid.h: -core/box/../pool_tls.h: core/box/mid_large_config_box.h: core/box/../hakmem_config.h: core/box/../hakmem_features.h: @@ -182,6 +182,7 @@ core/box/ss_slab_meta_box.h: core/box/fg_tiny_gate_box.h: core/box/hak_wrappers.inc.h: core/box/front_gate_classifier.h: +core/box/../hakmem_pool.h: core/box/../front/malloc_tiny_fast.h: core/box/../front/../hakmem_build_flags.h: core/box/../front/../hakmem_tiny_config.h: