Fix mid free routing and relax mid W_MAX

This commit is contained in:
Moe Charm (CI)
2025-12-01 22:06:10 +09:00
parent 4ef0171bc0
commit 195c74756c
11 changed files with 119 additions and 40 deletions

View File

@ -199,16 +199,24 @@ ptr_classification_t classify_ptr(void* ptr) {
uint8_t header = *((uint8_t*)ptr - 1);
uint8_t magic = header & 0xF0;
// Fast path: Tiny allocation (magic = 0xa0)
// Fast path: Tiny allocation (magic = 0xa0) — guarded by Superslab registry
if (magic == HEADER_MAGIC) { // HEADER_MAGIC = 0xa0
int class_idx = header & HEADER_CLASS_MASK;
if (class_idx >= 0 && class_idx < TINY_NUM_CLASSES) {
SuperSlab* ss = hak_super_lookup(ptr);
if (ss && ss->magic == SUPERSLAB_MAGIC) {
result.kind = PTR_KIND_TINY_HEADER;
result.class_idx = class_idx;
result.ss = ss;
#if !HAKMEM_BUILD_RELEASE
g_classify_header_hit++;
#endif
return result;
} else {
// Superslab未登録 → hakmem外。Tiny扱いしない。
result.kind = PTR_KIND_UNKNOWN;
return result;
}
}
}

View File

@ -106,6 +106,14 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
hkm_size_hist_record(size);
// Legacy Mid MT allocator (Phase 5) is disabled by default to favor ACE/Pool.
// Enable via HAKMEM_MID_MT_ENABLE=1 when running legacy benchmarks.
static int g_mid_mt_enabled = -1;
if (__builtin_expect(g_mid_mt_enabled < 0, 0)) {
const char* e = getenv("HAKMEM_MID_MT_ENABLE");
g_mid_mt_enabled = (e && *e && *e != '0') ? 1 : 0;
}
#ifdef HAKMEM_POOL_TLS_PHASE1
// Phase 1: Ultra-fast Pool TLS for 8KB-52KB range
if (size >= 8192 && size <= 53248) {
@ -116,7 +124,7 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
}
#endif
if (__builtin_expect(mid_is_in_range(size), 0)) {
if (__builtin_expect(g_mid_mt_enabled && mid_is_in_range(size), 0)) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_START(t_mid);
#endif

View File

@ -30,6 +30,7 @@ void* realloc(void* ptr, size_t size) {
#include "../ptr_trace.h" // Debug: pointer trace immediate dump on libc fallback
#include "front_gate_classifier.h" // Box FG: pointer classification (header/reg)
#include "../hakmem_pool.h" // Mid registry lookup (failsafe for headerless Mid)
#include "../front/malloc_tiny_fast.h" // Phase 26: Front Gate Unification
#include "tiny_front_config_box.h" // Phase 4-Step3: Compile-time config for dead code elimination
#include "mid_free_route_box.h" // Phase 5-Step2: Mid MT free routing fix
@ -76,7 +77,13 @@ void* malloc(size_t size) {
// CRITICAL FIX (BUG #7): Increment lock depth FIRST, before ANY libc calls
// This prevents infinite recursion when getenv/fprintf/dlopen call malloc
g_hakmem_lock_depth++;
if (size == 33000) write(2, "STEP:1 Lock++\n", 14);
// Debug step trace for 33KB: gated by env HAKMEM_STEP_TRACE (default: OFF)
static int g_step_trace = -1;
if (__builtin_expect(g_step_trace == -1, 0)) {
const char* e = getenv("HAKMEM_STEP_TRACE");
g_step_trace = (e && *e && *e != '0') ? 1 : 0;
}
if (g_step_trace && size == 33000) write(2, "STEP:1 Lock++\n", 14);
// Guard against recursion during initialization
if (__builtin_expect(g_initializing != 0, 0)) {
@ -100,11 +107,11 @@ void* malloc(size_t size) {
if (size == 33000) write(2, "RET:ForceLibc\n", 14);
return __libc_malloc(size);
}
if (size == 33000) write(2, "STEP:2 ForceLibc passed\n", 24);
if (g_step_trace && size == 33000) write(2, "STEP:2 ForceLibc passed\n", 24);
int ld_mode = hak_ld_env_mode();
if (ld_mode) {
if (size == 33000) write(2, "STEP:3 LD Mode\n", 15);
if (g_step_trace && size == 33000) write(2, "STEP:3 LD Mode\n", 15);
if (hak_ld_block_jemalloc() && g_jemalloc_loaded) {
g_hakmem_lock_depth--;
extern void* __libc_malloc(size_t);
@ -131,7 +138,7 @@ void* malloc(size_t size) {
return __libc_malloc(size);
}
}
if (size == 33000) write(2, "STEP:4 LD Check passed\n", 23);
if (g_step_trace && size == 33000) write(2, "STEP:4 LD Check passed\n", 23);
// Phase 26: CRITICAL - Ensure initialization before fast path
// (fast path bypasses hak_alloc_at, so we need to init here)
@ -145,9 +152,9 @@ void* malloc(size_t size) {
// Phase 4-Step3: Use config macro for compile-time optimization
// Phase 7-Step1: Changed expect hint from 0→1 (unified path is now LIKELY)
if (__builtin_expect(TINY_FRONT_UNIFIED_GATE_ENABLED, 1)) {
if (size == 33000) write(2, "STEP:5 Unified Gate check\n", 26);
if (g_step_trace && size == 33000) write(2, "STEP:5 Unified Gate check\n", 26);
if (size <= tiny_get_max_size()) {
if (size == 33000) write(2, "STEP:5.1 Inside Unified\n", 24);
if (g_step_trace && size == 33000) write(2, "STEP:5.1 Inside Unified\n", 24);
void* ptr = malloc_tiny_fast(size);
if (__builtin_expect(ptr != NULL, 1)) {
g_hakmem_lock_depth--;
@ -157,7 +164,7 @@ void* malloc(size_t size) {
// Unified Cache miss → fallback to normal path (hak_alloc_at)
}
}
if (size == 33000) write(2, "STEP:6 All checks passed\n", 25);
if (g_step_trace && size == 33000) write(2, "STEP:6 All checks passed\n", 25);
#if !HAKMEM_BUILD_RELEASE
if (count > 14250 && count < 14280 && size <= 1024) {
@ -238,6 +245,12 @@ void free(void* ptr) {
default: break;
}
}
if (!is_hakmem_owned) {
// Failsafe: Mid registry lookup catches headerless/corrupted Mid allocations
if (hak_pool_mid_lookup(ptr, NULL)) {
is_hakmem_owned = 1;
}
}
if (is_hakmem_owned) {
// Route to hak_free_at even if lock_depth>0ログ抑制のためptr_traceのみ使用
@ -316,14 +329,27 @@ void free(void* ptr) {
if (offset_in_page > 0) {
// Not page-aligned, safe to check ptr-1
uint8_t header = *((uint8_t*)ptr - 1);
if ((header & 0xF0) == 0xA0 || (header & 0xF0) == 0xB0) {
// HEADER_MAGIC found (0xa0 or 0xb0) → hakmem Tiny allocation
if ((header & 0xF0) == 0xA0) {
// Tiny header byte → require Superslab to avoid誤分類
SuperSlab* ss = hak_super_lookup(ptr);
if (ss && ss->magic == SUPERSLAB_MAGIC) {
g_hakmem_lock_depth++;
hak_free_at(ptr, 0, HAK_CALLSITE());
g_hakmem_lock_depth--;
return;
}
// No header magic → external pointer (BenchMeta, libc allocation, etc.)
// Superslab未登録 → hakmem管理外。libc free にも渡さず無視(ワークセットのゴミ対策)。
return;
} else if ((header & 0xF0) == 0xB0) {
// Pool TLS header (if enabled) — no registry check needed
#ifdef HAKMEM_POOL_TLS_PHASE1
g_hakmem_lock_depth++;
hak_free_at(ptr, 0, HAK_CALLSITE());
g_hakmem_lock_depth--;
return;
#endif
}
// No valid hakmem header → external pointer (BenchMeta, libc allocation, etc.)
extern void __libc_free(void*);
ptr_trace_dump_now("wrap_libc_external_nomag");
__libc_free(ptr);

View File

@ -16,10 +16,17 @@ void* hak_pool_try_alloc(size_t size, uintptr_t site_id) {
// Debug for 33-41KB allocations
if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] hak_pool_try_alloc: size=%zu (after init)\n", size); }
// P1.7 approach: Avoid using pool during ALL wrapper calls (conservative but safe)
// P1.7 guard: allow pool by default even when called from wrappers.
// Only block if explicitly disabled via env or during nested recursion.
extern int hak_in_wrapper(void);
if (hak_in_wrapper() && !g_wrap_l2_enabled) {
if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] REJECTED: in_wrapper=%d, wrap_l2=%d\n", hak_in_wrapper(), g_wrap_l2_enabled); }
extern __thread int g_hakmem_lock_depth;
int in_wrapper = hak_in_wrapper();
if (in_wrapper && g_hakmem_lock_depth > 1) {
if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] REJECTED: nested wrapper depth=%d\n", g_hakmem_lock_depth); }
return NULL;
}
if (in_wrapper && !g_wrap_l2_enabled) {
if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] REJECTED: in_wrapper=%d, wrap_l2=%d\n", in_wrapper, g_wrap_l2_enabled); }
return NULL;
}
if (!hak_pool_is_poolable(size)) {

View File

@ -52,7 +52,9 @@ static void hak_pool_init_impl(void) {
const char* e_tls = getenv("HAKMEM_POOL_TLS_FREE");
g_pool.tls_free_enabled = (e_tls == NULL) ? 1 : (atoi(e_tls) != 0);
const char* e_wrap = getenv("HAKMEM_WRAP_L2");
g_wrap_l2_enabled = (e_wrap && atoi(e_wrap) != 0) ? 1 : 0;
if (e_wrap) {
g_wrap_l2_enabled = (atoi(e_wrap) != 0);
}
const char* e_minb = getenv("HAKMEM_POOL_MIN_BUNDLE");
if (e_minb) { int v = atoi(e_minb); if (v >= 1 && v <= 8) g_pool_min_bundle = v; }
const char* e_mix = getenv("HAKMEM_SHARD_MIX");

View File

@ -155,6 +155,12 @@ static inline int free_tiny_fast(void* ptr) {
// 4. BASE を計算して Unified Cache に push
void* base = (void*)((char*)ptr - 1);
// 5. Superslab 登録確認(誤分類防止)
SuperSlab* ss_guard = hak_super_lookup(ptr);
if (__builtin_expect(!(ss_guard && ss_guard->magic == SUPERSLAB_MAGIC), 0)) {
return 0; // hakmem 管理外 → 通常 free 経路へ
}
// Cross-thread free detection (Larson MT crash fix, ENV gated)
{
static __thread int g_larson_fix = -1;

View File

@ -120,7 +120,7 @@ static struct {
atomic_uint remote_count[L25_NUM_CLASSES][L25_NUM_SHARDS];
} g_l25_pool;
static int g_wrap_l25_enabled = 0; // env: HAKMEM_WRAP_L25=1 to allow in wrappers
static int g_wrap_l25_enabled = 1; // env: HAKMEM_WRAP_L25=0 to disable in wrappers
static int g_l25_tls_ring_enabled = 1; // env: HAKMEM_POOL_TLS_RING
static int g_l25_trylock_probes = 3; // env: HAKMEM_TRYLOCK_PROBES
static int g_l25_tls_lo_max = 256; // env: HAKMEM_TLS_LO_MAX
@ -722,7 +722,9 @@ void hak_l25_pool_init(void) {
char* dz = getenv("HAKMEM_L25_DZ");
g_l25_pool.demand_zero = (dz && atoi(dz) != 0) ? 1 : 0;
const char* e_wrap = getenv("HAKMEM_WRAP_L25");
g_wrap_l25_enabled = (e_wrap && atoi(e_wrap) != 0) ? 1 : 0;
if (e_wrap) {
g_wrap_l25_enabled = (atoi(e_wrap) != 0);
}
const char* e_ring = getenv("HAKMEM_POOL_TLS_RING");
if (e_ring) g_l25_tls_ring_enabled = (atoi(e_ring) != 0);
const char* e_probe = getenv("HAKMEM_TRYLOCK_PROBES");
@ -812,7 +814,10 @@ void* hak_l25_pool_try_alloc(size_t size, uintptr_t site_id) {
if (!g_l25_pool.initialized) hak_l25_pool_init();
// P1.7 approach: Avoid using L2.5 during ALL wrapper calls (conservative but safe)
extern int hak_in_wrapper(void);
if (hak_in_wrapper() && !g_wrap_l25_enabled) return NULL;
extern __thread int g_hakmem_lock_depth;
int in_wrapper = hak_in_wrapper();
if (in_wrapper && g_hakmem_lock_depth > 1) return NULL;
if (in_wrapper && !g_wrap_l25_enabled) return NULL;
if (!hak_l25_pool_is_poolable(size)) return NULL;
// Get class index (inline綺麗綺麗!)

View File

@ -61,7 +61,7 @@ void hkm_policy_init(void) {
// W_MAX = 要求サイズの何倍までのクラスを許容するか
//
// 現在の値:
// - w_max_mid = 1.40 (40%切り上げ許容) - やや保守的
// - w_max_mid = 2.00 (100%切り上げ許容) - Mid全域カバー重視
// - w_max_large = 1.30 (30%切り上げ許容) - 保守的 **問題あり**
//
// 問題点:
@ -69,7 +69,7 @@ void hkm_policy_init(void) {
// 例: 35KB要求 → 64KB使用は 1.83倍 > 1.30 → NG → malloc fallback
//
// 推奨値:
// - w_max_mid = 1.401.60 (40-60%許容)
// - w_max_mid = 1.602.00 (60-100%許容) — Mid MT を切った場合はこちら
// - w_max_large = 1.60 (60%許容) ⭐⭐⭐ 即効改善
//
// トレードオフ:
@ -78,7 +78,7 @@ void hkm_policy_init(void) {
// ========================================================================
// shard/policy maps default to 0 (noop)
pol->w_max_mid = 1.60f; // Phase 6.25: Looser for MidPool performance (was 1.40)
pol->w_max_mid = 2.00f; // Phase 7: Mid MT off → W_MAX緩和でMidクラス全域をカバー
pol->w_max_large = 1.30f; // Phase 6.21: Revert to 1.30 (Bridge classes now cover 32-64KB gap)
pol->w_max = 1.6f; // legacy aggregate (unused by ACE)
pol->thp_threshold = 2 * 1024 * 1024; // 2MiB

View File

@ -796,7 +796,7 @@ static struct {
atomic_uint_fast64_t ring_underflow __attribute__((aligned(64)));
} g_pool;
static int g_wrap_l2_enabled = 0; // env: HAKMEM_WRAP_L2=1 to allow in wrappers
static int g_wrap_l2_enabled = 1; // env: HAKMEM_WRAP_L2=0 to disable in wrappers
static int g_shard_mix_enabled = 0; // env: HAKMEM_SHARD_MIX=1 to enable stronger hashing
static int g_tls_ring_enabled = 1; // env: HAKMEM_POOL_TLS_RING=1 to enable TLS ring
static int g_trylock_probes = 3; // env: HAKMEM_TRYLOCK_PROBES (1..8)

View File

@ -63,11 +63,20 @@ static inline void* tls_list_pop(TinyTLSList* tls, int class_idx) {
return NULL;
}
// Fail-fast: reject obviously invalid head before dereference
size_t blk = g_tiny_class_sizes[class_idx];
if (__builtin_expect(blk == 0 || ((uintptr_t)head % blk) != 0, 0)) {
fprintf(stderr, "[TLS_LIST_POISON] cls=%d head=%p count=%u (misaligned or size=0)\n",
class_idx, head, tls->count);
uintptr_t haddr = (uintptr_t)head;
size_t blk = (class_idx >= 0 && class_idx < TINY_NUM_CLASSES) ? g_tiny_class_sizes[class_idx] : 0;
int bad_range = (haddr < 4096) || (haddr > 0x00007fffffffffffULL) || (haddr & 0x7u);
int bad_align = (blk == 0) || (haddr % blk != 0);
if (__builtin_expect(bad_range || bad_align, 0)) {
static __thread uint8_t s_log_limit_pop = 0;
if (s_log_limit_pop < 4) {
fprintf(stderr, "[TLS_LIST_POISON] cls=%d head=%p count=%u range_bad=%d align_bad=%d blk=%zu\n",
class_idx, head, tls->count, bad_range, bad_align, blk);
s_log_limit_pop++;
}
if (__builtin_expect(tiny_refill_failfast_level() >= 1, 0)) {
tiny_failfast_abort_ptr("tls_list_pop", NULL, -1, head, "invalid_head");
}
tls->head = NULL;
tls->count = 0;
return NULL;
@ -129,6 +138,13 @@ static inline void tls_list_push(TinyTLSList* tls, void* node, int class_idx) {
// - caller handles spill/thresholds separately
static inline void* tls_list_pop_fast(TinyTLSList* tls, int class_idx) {
void* head = tls->head; if (!head) return NULL;
uintptr_t haddr = (uintptr_t)head;
size_t blk = (class_idx >= 0 && class_idx < TINY_NUM_CLASSES) ? g_tiny_class_sizes[class_idx] : 0;
if (__builtin_expect(haddr < 4096 || haddr > 0x00007fffffffffffULL || (haddr & 0x7u) || blk == 0 || (haddr % blk != 0), 0)) {
tls->head = NULL;
tls->count = 0;
return NULL;
}
tls->head = tiny_next_read(class_idx, head);
if (tls->count > 0) tls->count--;
return head;

View File

@ -24,12 +24,12 @@ hakmem.o: core/hakmem.c core/hakmem.h core/hakmem_build_flags.h \
core/box/hak_core_init.inc.h core/hakmem_phase7_config.h \
core/box/ss_hot_prewarm_box.h core/box/hak_alloc_api.inc.h \
core/box/../hakmem_tiny.h core/box/../hakmem_smallmid.h \
core/box/../pool_tls.h core/box/mid_large_config_box.h \
core/box/../hakmem_config.h core/box/../hakmem_features.h \
core/box/hak_free_api.inc.h core/hakmem_tiny_superslab.h \
core/box/../tiny_free_fast_v2.inc.h core/box/../tiny_region_id.h \
core/box/../hakmem_build_flags.h core/box/../hakmem_tiny_config.h \
core/box/../box/tls_sll_box.h core/box/../box/../hakmem_internal.h \
core/box/mid_large_config_box.h core/box/../hakmem_config.h \
core/box/../hakmem_features.h core/box/hak_free_api.inc.h \
core/hakmem_tiny_superslab.h core/box/../tiny_free_fast_v2.inc.h \
core/box/../tiny_region_id.h core/box/../hakmem_build_flags.h \
core/box/../hakmem_tiny_config.h core/box/../box/tls_sll_box.h \
core/box/../box/../hakmem_internal.h \
core/box/../box/../hakmem_tiny_config.h \
core/box/../box/../hakmem_build_flags.h \
core/box/../box/../hakmem_debug_master.h \
@ -55,7 +55,8 @@ hakmem.o: core/hakmem.c core/hakmem.h core/hakmem_build_flags.h \
core/hakmem_tiny_integrity.h core/box/front_gate_v2.h \
core/box/external_guard_box.h core/box/ss_slab_meta_box.h \
core/box/fg_tiny_gate_box.h core/box/hak_wrappers.inc.h \
core/box/front_gate_classifier.h core/box/../front/malloc_tiny_fast.h \
core/box/front_gate_classifier.h core/box/../hakmem_pool.h \
core/box/../front/malloc_tiny_fast.h \
core/box/../front/../hakmem_build_flags.h \
core/box/../front/../hakmem_tiny_config.h \
core/box/../front/../superslab/superslab_inline.h \
@ -133,7 +134,6 @@ core/box/ss_hot_prewarm_box.h:
core/box/hak_alloc_api.inc.h:
core/box/../hakmem_tiny.h:
core/box/../hakmem_smallmid.h:
core/box/../pool_tls.h:
core/box/mid_large_config_box.h:
core/box/../hakmem_config.h:
core/box/../hakmem_features.h:
@ -182,6 +182,7 @@ core/box/ss_slab_meta_box.h:
core/box/fg_tiny_gate_box.h:
core/box/hak_wrappers.inc.h:
core/box/front_gate_classifier.h:
core/box/../hakmem_pool.h:
core/box/../front/malloc_tiny_fast.h:
core/box/../front/../hakmem_build_flags.h:
core/box/../front/../hakmem_tiny_config.h: