2025-12-02 16:16:51 +09:00
|
|
|
#define _GNU_SOURCE
|
|
|
|
|
#include "wrapper_env_box.h"
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <string.h>
|
|
|
|
|
|
2025-12-13 16:46:18 +09:00
|
|
|
wrapper_env_cfg_t g_wrapper_env = {.inited = 0, .step_trace = 0, .ld_safe_mode = 1, .free_wrap_trace = 0, .wrap_diag = 0, .wrap_shape = 0};
|
2025-12-02 16:16:51 +09:00
|
|
|
|
|
|
|
|
static inline int env_flag(const char* name, int def) {
|
|
|
|
|
const char* e = getenv(name);
|
|
|
|
|
if (!e || *e == '\0') return def;
|
|
|
|
|
return (*e != '0');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline int env_int(const char* name, int def) {
|
|
|
|
|
const char* e = getenv(name);
|
|
|
|
|
if (!e || *e == '\0') return def;
|
|
|
|
|
char* end;
|
|
|
|
|
long val = strtol(e, &end, 10);
|
|
|
|
|
return (end != e) ? (int)val : def;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void wrapper_env_init_once(void) {
|
|
|
|
|
// Atomic CAS to ensure exactly-once initialization
|
|
|
|
|
static _Atomic int init_started = 0;
|
|
|
|
|
int expected = 0;
|
|
|
|
|
|
|
|
|
|
if (!atomic_compare_exchange_strong_explicit(&init_started, &expected, 1,
|
|
|
|
|
memory_order_acq_rel,
|
|
|
|
|
memory_order_relaxed)) {
|
|
|
|
|
// Someone else is initializing or already initialized
|
|
|
|
|
// Spin until they're done
|
|
|
|
|
while (!__builtin_expect(g_wrapper_env.inited, 1)) {
|
|
|
|
|
__builtin_ia32_pause();
|
|
|
|
|
}
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// We own the initialization
|
|
|
|
|
g_wrapper_env.step_trace = env_flag("HAKMEM_STEP_TRACE", 0);
|
|
|
|
|
g_wrapper_env.ld_safe_mode = env_int("HAKMEM_LD_SAFE", 1);
|
|
|
|
|
g_wrapper_env.free_wrap_trace = env_flag("HAKMEM_FREE_WRAP_TRACE", 0);
|
feat(Phase 2-1): Lane Classification + Fallback Reduction
## Phase 2-1: Lane Classification Box (Single Source of Truth)
### New Module: hak_lane_classify.inc.h
- Centralized size-to-lane mapping with unified boundary definitions
- Lane architecture:
- LANE_TINY: [0, 1024B] SuperSlab (unchanged)
- LANE_POOL: [1025, 52KB] Pool per-thread (extended!)
- LANE_ACE: [52KB, 2MB] ACE learning
- LANE_HUGE: [2MB+] mmap direct
- Key invariant: POOL_MIN = TINY_MAX + 1 (no gaps)
### Fixed: Tiny/Pool Boundary Mismatch
- Before: TINY_MAX_SIZE=1024 vs tiny_get_max_size()=2047 (inconsistent!)
- After: Both reference LANE_TINY_MAX=1024 (authoritative)
- Impact: Eliminates 1025-2047B "unmanaged zone" causing libc fragmentation
### Updated Files
- core/hakmem_tiny.h: Use LANE_TINY_MAX, fix sizes[7]=1024 (was 2047)
- core/hakmem_pool.h: Use POOL_MIN_REQUEST_SIZE=1025 (was 2048)
- core/box/hak_alloc_api.inc.h: Lane-based routing (HAK_LANE_IS_*)
## jemalloc Block Bug Fix
### Root Cause
- g_jemalloc_loaded initialized to -1 (unknown)
- Condition `if (block && g_jemalloc_loaded)` treated -1 as true
- Result: ALL allocations fallback to libc (even when jemalloc not loaded!)
### Fix
- Change condition to `g_jemalloc_loaded > 0`
- Only fallback when jemalloc is ACTUALLY loaded
- Applied to: malloc/free/calloc/realloc
### Impact
- Before: 100% libc fallback (jemalloc block false positive)
- After: Only genuine cases fallback (init_wait, lockdepth, etc.)
## Fallback Diagnostics (ChatGPT contribution)
### New Feature: HAKMEM_WRAP_DIAG
- ENV flag to enable fallback logging
- Reason-specific counters (init_wait, jemalloc_block, lockdepth, etc.)
- First 4 occurrences logged per reason
- Helps identify unwanted fallback paths
### Implementation
- core/box/wrapper_env_box.{c,h}: ENV cache + DIAG flag
- core/box/hak_wrappers.inc.h: wrapper_record_fallback() calls
## Verification
### Fallback Reduction
- Before fix: [wrap] libc malloc: jemalloc block (100% fallback)
- After fix: Only init_wait + lockdepth (expected, minimal)
### Known Issue
- Tiny allocator OOM (size=8) still crashes
- This is a pre-existing bug, unrelated to Phase 2-1
- Was hidden by jemalloc block false positive
- Will be investigated separately
## Performance Impact
### sh8bench 8 threads
- Phase 1-1: 15秒
- Phase 2-1: 14秒 (~7% improvement)
### Note
- True hakmem performance now measurable (no more 100% fallback)
- Tiny OOM prevents full benchmark completion
- Next: Fix Tiny allocator for complete evaluation
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-12-02 19:13:28 +09:00
|
|
|
g_wrapper_env.wrap_diag = env_flag("HAKMEM_WRAP_DIAG", 0);
|
2025-12-13 16:46:18 +09:00
|
|
|
g_wrapper_env.wrap_shape = env_flag("HAKMEM_WRAP_SHAPE", 0);
|
2025-12-02 16:16:51 +09:00
|
|
|
|
|
|
|
|
// Mark as initialized last with memory barrier
|
|
|
|
|
atomic_store_explicit(&g_wrapper_env.inited, 1, memory_order_release);
|
|
|
|
|
}
|
|
|
|
|
|
2025-12-13 18:46:11 +09:00
|
|
|
void wrapper_env_refresh_from_env(void) {
|
|
|
|
|
// Ensure base init happened (constructor may have already done this)
|
|
|
|
|
if (!g_wrapper_env.inited) {
|
|
|
|
|
wrapper_env_init_once();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Re-read ENV (bench_profile may have injected defaults via putenv)
|
|
|
|
|
g_wrapper_env.step_trace = env_flag("HAKMEM_STEP_TRACE", 0);
|
|
|
|
|
g_wrapper_env.ld_safe_mode = env_int("HAKMEM_LD_SAFE", 1);
|
|
|
|
|
g_wrapper_env.free_wrap_trace = env_flag("HAKMEM_FREE_WRAP_TRACE", 0);
|
|
|
|
|
g_wrapper_env.wrap_diag = env_flag("HAKMEM_WRAP_DIAG", 0);
|
|
|
|
|
g_wrapper_env.wrap_shape = env_flag("HAKMEM_WRAP_SHAPE", 0);
|
|
|
|
|
|
|
|
|
|
atomic_store_explicit(&g_wrapper_env.inited, 1, memory_order_release);
|
|
|
|
|
}
|
|
|
|
|
|
2025-12-02 16:16:51 +09:00
|
|
|
__attribute__((constructor(101)))
|
|
|
|
|
static void wrapper_env_ctor(void) {
|
|
|
|
|
// Constructor priority 101 runs early (libc uses 100+)
|
|
|
|
|
// This ensures initialization before any malloc calls
|
|
|
|
|
if (!g_wrapper_env.inited) {
|
|
|
|
|
wrapper_env_init_once();
|
|
|
|
|
}
|
|
|
|
|
}
|