feat(Phase 1-2): Add atomic initialization wait mechanism (safety improvement)

Implements thread-safe atomic initialization tracking and a wait helper for
non-init threads to avoid libc fallback during the initialization window.

Changes:
- Convert g_initializing to _Atomic type for thread-safe access
- Add g_init_thread to identify which thread performs initialization
- Implement hak_init_wait_for_ready() helper with spin/yield mechanism
- Update hak_core_init.inc.h to use atomic operations
- Update hak_wrappers.inc.h to call wait helper instead of checking g_initializing

Results & Analysis:
- Performance: ±0% (21s → 21s, no measurable improvement)
- Safety: ✓ Prevents recursion in init window
- Investigation: Initialization overhead is <1% of total allocations
  - Expected: 2-8% improvement
  - Actual: 0% improvement (spin/yield overhead ≈ savings)
  - libc overhead: 41% → 57% (relative increase, likely sampling variation)

Key Findings from Perf Analysis:
- getenv: 0% (maintained from Phase 1-1) ✓
- libc malloc/free: ~24.54% of cycles
- libc fragmentation (malloc_consolidate/unlink_chunk): ~16% of cycles
- Total libc overhead: ~41% (difficult to optimize without changing algorithm)

Next Phase Target:
- Phase 2: Investigate libc fragmentation (malloc_consolidate 9.33%, unlink_chunk 6.90%)
- Potential approaches: hakmem Mid/ACE allocator expansion, sh8bench pattern analysis

Recommendation: Keep Phase 1-2 for safety (no performance regression), proceed to Phase 2.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-12-02 16:44:27 +09:00
parent 49969d2e0f
commit 695aec8279
3 changed files with 48 additions and 12 deletions

View File

@ -82,7 +82,8 @@ void* malloc(size_t size) {
if (wcfg->step_trace && size == 33000) write(2, "STEP:1 Lock++\n", 14);
// Guard against recursion during initialization
if (__builtin_expect(g_initializing != 0, 0)) {
int init_wait = hak_init_wait_for_ready();
if (__builtin_expect(init_wait <= 0, 0)) {
g_hakmem_lock_depth--;
extern void* __libc_malloc(size_t);
if (size == 33000) write(2, "RET:Initializing\n", 17);
@ -115,7 +116,8 @@ void* malloc(size_t size) {
return __libc_malloc(size);
}
if (!g_initialized) { hak_init(); }
if (g_initializing) {
int ld_init_wait = hak_init_wait_for_ready();
if (__builtin_expect(ld_init_wait <= 0, 0)) {
g_hakmem_lock_depth--;
extern void* __libc_malloc(size_t);
if (wcfg->step_trace && size == 33000) write(2, "RET:Init2\n", 10);
@ -285,7 +287,8 @@ void free(void* ptr) {
__libc_free(ptr);
return;
}
if (__builtin_expect(g_initializing != 0, 0)) {
int free_init_wait = hak_init_wait_for_ready();
if (__builtin_expect(free_init_wait <= 0, 0)) {
#if !HAKMEM_BUILD_RELEASE
uint64_t count = atomic_fetch_add_explicit(&fg_libc_bypass_count, 1, memory_order_relaxed);
if (count < 10) {
@ -301,7 +304,8 @@ void free(void* ptr) {
if (hak_ld_env_mode()) {
if (hak_ld_block_jemalloc() && g_jemalloc_loaded) { extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_ld_jemalloc"); __libc_free(ptr); return; }
if (!g_initialized) { hak_init(); }
if (g_initializing) { extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_ld_init"); __libc_free(ptr); return; }
int free_ld_wait = hak_init_wait_for_ready();
if (__builtin_expect(free_ld_wait <= 0, 0)) { extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_ld_init"); __libc_free(ptr); return; }
}
// Phase 15: Box Separation - Domain check to distinguish hakmem vs external pointers
@ -360,7 +364,8 @@ void* calloc(size_t nmemb, size_t size) {
return __libc_calloc(nmemb, size);
}
if (__builtin_expect(g_initializing != 0, 0)) {
int calloc_init_wait = hak_init_wait_for_ready();
if (__builtin_expect(calloc_init_wait <= 0, 0)) {
g_hakmem_lock_depth--;
extern void* __libc_calloc(size_t, size_t);
return __libc_calloc(nmemb, size);
@ -387,7 +392,8 @@ void* calloc(size_t nmemb, size_t size) {
return __libc_calloc(nmemb, size);
}
if (!g_initialized) { hak_init(); }
if (g_initializing) {
int calloc_ld_wait = hak_init_wait_for_ready();
if (__builtin_expect(calloc_ld_wait <= 0, 0)) {
g_hakmem_lock_depth--;
extern void* __libc_calloc(size_t, size_t);
return __libc_calloc(nmemb, size);
@ -416,13 +422,15 @@ void* calloc(size_t nmemb, size_t size) {
void* realloc(void* ptr, size_t size) {
if (g_hakmem_lock_depth > 0) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); }
if (__builtin_expect(g_initializing != 0, 0)) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); }
int realloc_init_wait = hak_init_wait_for_ready();
if (__builtin_expect(realloc_init_wait <= 0, 0)) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); }
if (__builtin_expect(hak_force_libc_alloc(), 0)) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); }
int ld_mode = hak_ld_env_mode();
if (ld_mode) {
if (hak_ld_block_jemalloc() && g_jemalloc_loaded) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); }
if (!g_initialized) { hak_init(); }
if (g_initializing) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); }
int realloc_ld_wait = hak_init_wait_for_ready();
if (__builtin_expect(realloc_ld_wait <= 0, 0)) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); }
}
if (ptr == NULL) { return malloc(size); }
if (size == 0) { free(ptr); return NULL; }