Fix mid free routing and relax mid W_MAX

This commit is contained in:
Moe Charm (CI)
2025-12-01 22:06:10 +09:00
parent 4ef0171bc0
commit 195c74756c
11 changed files with 119 additions and 40 deletions

View File

@ -199,16 +199,24 @@ ptr_classification_t classify_ptr(void* ptr) {
uint8_t header = *((uint8_t*)ptr - 1);
uint8_t magic = header & 0xF0;
// Fast path: Tiny allocation (magic = 0xa0)
// Fast path: Tiny allocation (magic = 0xa0) — guarded by Superslab registry
if (magic == HEADER_MAGIC) { // HEADER_MAGIC = 0xa0
int class_idx = header & HEADER_CLASS_MASK;
if (class_idx >= 0 && class_idx < TINY_NUM_CLASSES) {
result.kind = PTR_KIND_TINY_HEADER;
result.class_idx = class_idx;
SuperSlab* ss = hak_super_lookup(ptr);
if (ss && ss->magic == SUPERSLAB_MAGIC) {
result.kind = PTR_KIND_TINY_HEADER;
result.class_idx = class_idx;
result.ss = ss;
#if !HAKMEM_BUILD_RELEASE
g_classify_header_hit++;
g_classify_header_hit++;
#endif
return result;
return result;
} else {
// Superslab未登録 → hakmem外。Tiny扱いしない。
result.kind = PTR_KIND_UNKNOWN;
return result;
}
}
}

View File

@ -106,6 +106,14 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
hkm_size_hist_record(size);
// Legacy Mid MT allocator (Phase 5) is disabled by default to favor ACE/Pool.
// Enable via HAKMEM_MID_MT_ENABLE=1 when running legacy benchmarks.
static int g_mid_mt_enabled = -1;
if (__builtin_expect(g_mid_mt_enabled < 0, 0)) {
const char* e = getenv("HAKMEM_MID_MT_ENABLE");
g_mid_mt_enabled = (e && *e && *e != '0') ? 1 : 0;
}
#ifdef HAKMEM_POOL_TLS_PHASE1
// Phase 1: Ultra-fast Pool TLS for 8KB-52KB range
if (size >= 8192 && size <= 53248) {
@ -116,7 +124,7 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
}
#endif
if (__builtin_expect(mid_is_in_range(size), 0)) {
if (__builtin_expect(g_mid_mt_enabled && mid_is_in_range(size), 0)) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_START(t_mid);
#endif

View File

@ -30,6 +30,7 @@ void* realloc(void* ptr, size_t size) {
#include "../ptr_trace.h" // Debug: pointer trace immediate dump on libc fallback
#include "front_gate_classifier.h" // Box FG: pointer classification (header/reg)
#include "../hakmem_pool.h" // Mid registry lookup (failsafe for headerless Mid)
#include "../front/malloc_tiny_fast.h" // Phase 26: Front Gate Unification
#include "tiny_front_config_box.h" // Phase 4-Step3: Compile-time config for dead code elimination
#include "mid_free_route_box.h" // Phase 5-Step2: Mid MT free routing fix
@ -76,7 +77,13 @@ void* malloc(size_t size) {
// CRITICAL FIX (BUG #7): Increment lock depth FIRST, before ANY libc calls
// This prevents infinite recursion when getenv/fprintf/dlopen call malloc
g_hakmem_lock_depth++;
if (size == 33000) write(2, "STEP:1 Lock++\n", 14);
// Debug step trace for 33KB: gated by env HAKMEM_STEP_TRACE (default: OFF)
static int g_step_trace = -1;
if (__builtin_expect(g_step_trace == -1, 0)) {
const char* e = getenv("HAKMEM_STEP_TRACE");
g_step_trace = (e && *e && *e != '0') ? 1 : 0;
}
if (g_step_trace && size == 33000) write(2, "STEP:1 Lock++\n", 14);
// Guard against recursion during initialization
if (__builtin_expect(g_initializing != 0, 0)) {
@ -100,11 +107,11 @@ void* malloc(size_t size) {
if (size == 33000) write(2, "RET:ForceLibc\n", 14);
return __libc_malloc(size);
}
if (size == 33000) write(2, "STEP:2 ForceLibc passed\n", 24);
if (g_step_trace && size == 33000) write(2, "STEP:2 ForceLibc passed\n", 24);
int ld_mode = hak_ld_env_mode();
if (ld_mode) {
if (size == 33000) write(2, "STEP:3 LD Mode\n", 15);
if (g_step_trace && size == 33000) write(2, "STEP:3 LD Mode\n", 15);
if (hak_ld_block_jemalloc() && g_jemalloc_loaded) {
g_hakmem_lock_depth--;
extern void* __libc_malloc(size_t);
@ -131,7 +138,7 @@ void* malloc(size_t size) {
return __libc_malloc(size);
}
}
if (size == 33000) write(2, "STEP:4 LD Check passed\n", 23);
if (g_step_trace && size == 33000) write(2, "STEP:4 LD Check passed\n", 23);
// Phase 26: CRITICAL - Ensure initialization before fast path
// (fast path bypasses hak_alloc_at, so we need to init here)
@ -145,9 +152,9 @@ void* malloc(size_t size) {
// Phase 4-Step3: Use config macro for compile-time optimization
// Phase 7-Step1: Changed expect hint from 0→1 (unified path is now LIKELY)
if (__builtin_expect(TINY_FRONT_UNIFIED_GATE_ENABLED, 1)) {
if (size == 33000) write(2, "STEP:5 Unified Gate check\n", 26);
if (g_step_trace && size == 33000) write(2, "STEP:5 Unified Gate check\n", 26);
if (size <= tiny_get_max_size()) {
if (size == 33000) write(2, "STEP:5.1 Inside Unified\n", 24);
if (g_step_trace && size == 33000) write(2, "STEP:5.1 Inside Unified\n", 24);
void* ptr = malloc_tiny_fast(size);
if (__builtin_expect(ptr != NULL, 1)) {
g_hakmem_lock_depth--;
@ -157,7 +164,7 @@ void* malloc(size_t size) {
// Unified Cache miss → fallback to normal path (hak_alloc_at)
}
}
if (size == 33000) write(2, "STEP:6 All checks passed\n", 25);
if (g_step_trace && size == 33000) write(2, "STEP:6 All checks passed\n", 25);
#if !HAKMEM_BUILD_RELEASE
if (count > 14250 && count < 14280 && size <= 1024) {
@ -238,6 +245,12 @@ void free(void* ptr) {
default: break;
}
}
if (!is_hakmem_owned) {
// Failsafe: Mid registry lookup catches headerless/corrupted Mid allocations
if (hak_pool_mid_lookup(ptr, NULL)) {
is_hakmem_owned = 1;
}
}
if (is_hakmem_owned) {
// Route to hak_free_at even if lock_depth>0ログ抑制のためptr_traceのみ使用
@ -316,14 +329,27 @@ void free(void* ptr) {
if (offset_in_page > 0) {
// Not page-aligned, safe to check ptr-1
uint8_t header = *((uint8_t*)ptr - 1);
if ((header & 0xF0) == 0xA0 || (header & 0xF0) == 0xB0) {
// HEADER_MAGIC found (0xa0 or 0xb0) → hakmem Tiny allocation
if ((header & 0xF0) == 0xA0) {
// Tiny header byte → require Superslab to avoid誤分類
SuperSlab* ss = hak_super_lookup(ptr);
if (ss && ss->magic == SUPERSLAB_MAGIC) {
g_hakmem_lock_depth++;
hak_free_at(ptr, 0, HAK_CALLSITE());
g_hakmem_lock_depth--;
return;
}
// Superslab未登録 → hakmem管理外。libc free にも渡さず無視(ワークセットのゴミ対策)。
return;
} else if ((header & 0xF0) == 0xB0) {
// Pool TLS header (if enabled) — no registry check needed
#ifdef HAKMEM_POOL_TLS_PHASE1
g_hakmem_lock_depth++;
hak_free_at(ptr, 0, HAK_CALLSITE());
g_hakmem_lock_depth--;
return;
#endif
}
// No header magic → external pointer (BenchMeta, libc allocation, etc.)
// No valid hakmem header → external pointer (BenchMeta, libc allocation, etc.)
extern void __libc_free(void*);
ptr_trace_dump_now("wrap_libc_external_nomag");
__libc_free(ptr);

View File

@ -16,10 +16,17 @@ void* hak_pool_try_alloc(size_t size, uintptr_t site_id) {
// Debug for 33-41KB allocations
if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] hak_pool_try_alloc: size=%zu (after init)\n", size); }
// P1.7 approach: Avoid using pool during ALL wrapper calls (conservative but safe)
// P1.7 guard: allow pool by default even when called from wrappers.
// Only block if explicitly disabled via env or during nested recursion.
extern int hak_in_wrapper(void);
if (hak_in_wrapper() && !g_wrap_l2_enabled) {
if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] REJECTED: in_wrapper=%d, wrap_l2=%d\n", hak_in_wrapper(), g_wrap_l2_enabled); }
extern __thread int g_hakmem_lock_depth;
int in_wrapper = hak_in_wrapper();
if (in_wrapper && g_hakmem_lock_depth > 1) {
if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] REJECTED: nested wrapper depth=%d\n", g_hakmem_lock_depth); }
return NULL;
}
if (in_wrapper && !g_wrap_l2_enabled) {
if (size >= 33000 && size <= 41000) { HAKMEM_LOG("[Pool] REJECTED: in_wrapper=%d, wrap_l2=%d\n", in_wrapper, g_wrap_l2_enabled); }
return NULL;
}
if (!hak_pool_is_poolable(size)) {

View File

@ -52,7 +52,9 @@ static void hak_pool_init_impl(void) {
const char* e_tls = getenv("HAKMEM_POOL_TLS_FREE");
g_pool.tls_free_enabled = (e_tls == NULL) ? 1 : (atoi(e_tls) != 0);
const char* e_wrap = getenv("HAKMEM_WRAP_L2");
g_wrap_l2_enabled = (e_wrap && atoi(e_wrap) != 0) ? 1 : 0;
if (e_wrap) {
g_wrap_l2_enabled = (atoi(e_wrap) != 0);
}
const char* e_minb = getenv("HAKMEM_POOL_MIN_BUNDLE");
if (e_minb) { int v = atoi(e_minb); if (v >= 1 && v <= 8) g_pool_min_bundle = v; }
const char* e_mix = getenv("HAKMEM_SHARD_MIX");