From f454d35ea415d65c25ecf9b1b7acc62179d7f690 Mon Sep 17 00:00:00 2001 From: "Moe Charm (CI)" Date: Fri, 7 Nov 2025 01:15:28 +0900 Subject: [PATCH] =?UTF-8?q?Perf:=20getenv=20=E3=83=9B=E3=83=83=E3=83=88?= =?UTF-8?q?=E3=83=91=E3=82=B9=E3=83=9C=E3=83=88=E3=83=AB=E3=83=8D=E3=83=83?= =?UTF-8?q?=E3=82=AF=E5=89=8A=E9=99=A4=20(8.51%=20=E2=86=92=200%)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **問題:** perf で発見: - `getenv()`: 8.51% CPU on malloc hot path - malloc 内で `getenv("HAKMEM_SFC_DEBUG")` が毎回実行 - getenv は環境変数の線形走査 → 非常に重い **修正内容:** 1. `malloc()`: HAKMEM_SFC_DEBUG を初回のみ getenv して cache (Line 48-52) 2. `malloc()`: HAKMEM_LD_SAFE を初回のみ getenv して cache (Line 75-79) 3. `calloc()`: HAKMEM_LD_SAFE を初回のみ getenv して cache (Line 120-124) **効果:** - getenv CPU: 8.51% → 0% ✅ - superslab_refill: 10.30% → 9.61% (-7%) - hak_tiny_alloc_slow が新トップ: 9.61% **スループット:** - 4,192,132 ops/s (変化なし) - 理由: Syscall Saturation (86.7% kernel time) が支配的 - 次: SuperSlab Caching で syscall 90% 削減 → +100-150% 期待 **Perf結果 (before/after):** ``` Before: getenv 8.51% | superslab_refill 10.30% After: getenv 0% | hak_tiny_alloc_slow 9.61% | superslab_refill 9.61% ``` 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- core/box/hak_wrappers.inc.h | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/core/box/hak_wrappers.inc.h b/core/box/hak_wrappers.inc.h index 375c8580..bfc9563e 100644 --- a/core/box/hak_wrappers.inc.h +++ b/core/box/hak_wrappers.inc.h @@ -44,8 +44,13 @@ void* malloc(size_t size) { return __libc_malloc(size); } + // Cache getenv result to avoid 8.51% CPU overhead on hot path + static _Atomic int debug_enabled = -1; // -1 = uninitialized static _Atomic int debug_count = 0; - if (getenv("HAKMEM_SFC_DEBUG") && debug_count < 100) { + if (__builtin_expect(debug_enabled < 0, 0)) { + debug_enabled = (getenv("HAKMEM_SFC_DEBUG") != NULL) ? 1 : 0; + } + if (debug_enabled && debug_count < 100) { int n = atomic_fetch_add(&debug_count, 1); if (n < 20) fprintf(stderr, "[SFC_DEBUG] malloc(%zu)\n", size); } @@ -66,9 +71,13 @@ void* malloc(size_t size) { extern void* __libc_malloc(size_t); return __libc_malloc(size); } - const char* lds = getenv("HAKMEM_LD_SAFE"); - int mode = (lds ? atoi(lds) : 1); - if (mode >= 2 || size > TINY_MAX_SIZE) { + // Cache HAKMEM_LD_SAFE to avoid repeated getenv on hot path + static _Atomic int ld_safe_mode = -1; // -1 = uninitialized + if (__builtin_expect(ld_safe_mode < 0, 0)) { + const char* lds = getenv("HAKMEM_LD_SAFE"); + ld_safe_mode = (lds ? atoi(lds) : 1); + } + if (ld_safe_mode >= 2 || size > TINY_MAX_SIZE) { extern void* __libc_malloc(size_t); return __libc_malloc(size); } @@ -106,10 +115,15 @@ void* calloc(size_t nmemb, size_t size) { if (hak_ld_block_jemalloc() && hak_jemalloc_loaded()) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); } if (!g_initialized) { hak_init(); } if (g_initializing) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); } - const char* lds = getenv("HAKMEM_LD_SAFE"); - int mode = (lds ? atoi(lds) : 1); + // Reuse cached ld_safe_mode from malloc (same static variable scope won't work, use inline function instead) + // For now, duplicate the caching logic + static _Atomic int ld_safe_mode_calloc = -1; + if (__builtin_expect(ld_safe_mode_calloc < 0, 0)) { + const char* lds = getenv("HAKMEM_LD_SAFE"); + ld_safe_mode_calloc = (lds ? atoi(lds) : 1); + } size_t total = nmemb * size; - if (mode >= 2 || total > TINY_MAX_SIZE) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); } + if (ld_safe_mode_calloc >= 2 || total > TINY_MAX_SIZE) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); } } g_hakmem_lock_depth++; size_t total_size = nmemb * size;