Perf: getenv ホットパスボトルネック削除 (8.51% → 0%)

**問題:** perf で発見: - `getenv()`: 8.51% CPU on malloc hot path - malloc 内で `getenv("HAKMEM_SFC_DEBUG")` が毎回実行 - getenv は環境変数の線形走査 → 非常に重い **修正内容:** 1. `malloc()`: HAKMEM_SFC_DEBUG を初回のみ getenv して cache (Line 48-52) 2. `malloc()`: HAKMEM_LD_SAFE を初回のみ getenv して cache (Line 75-79) 3. `calloc()`: HAKMEM_LD_SAFE を初回のみ getenv して cache (Line 120-124) **効果:** - getenv CPU: 8.51% → 0% ✅ - superslab_refill: 10.30% → 9.61% (-7%) - hak_tiny_alloc_slow が新トップ: 9.61% **スループット:** - 4,192,132 ops/s (変化なし) - 理由: Syscall Saturation (86.7% kernel time) が支配的 - 次: SuperSlab Caching で syscall 90% 削減 → +100-150% 期待 **Perf結果 (before/after):** ``` Before: getenv 8.51% | superslab_refill 10.30% After: getenv 0% | hak_tiny_alloc_slow 9.61% | superslab_refill 9.61% ``` 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:15:28 +09:00
parent db833142f1
commit f454d35ea4
1 changed files with 21 additions and 7 deletions
--- a/core/box/hak_wrappers.inc.h
+++ b/core/box/hak_wrappers.inc.h
@ -44,8 +44,13 @@ void* malloc(size_t size) {
        return __libc_malloc(size);
    }
    // Cache getenv result to avoid 8.51% CPU overhead on hot path
    static _Atomic int debug_enabled = -1;  // -1 = uninitialized
    static _Atomic int debug_count = 0;
-    if (getenv("HAKMEM_SFC_DEBUG") && debug_count < 100) {
+    if (__builtin_expect(debug_enabled < 0, 0)) {
        debug_enabled = (getenv("HAKMEM_SFC_DEBUG") != NULL) ? 1 : 0;
    }
    if (debug_enabled && debug_count < 100) {
        int n = atomic_fetch_add(&debug_count, 1);
        if (n < 20) fprintf(stderr, "[SFC_DEBUG] malloc(%zu)\n", size);
    }
@ -66,9 +71,13 @@ void* malloc(size_t size) {
            extern void* __libc_malloc(size_t);
            return __libc_malloc(size);
        }
        // Cache HAKMEM_LD_SAFE to avoid repeated getenv on hot path
        static _Atomic int ld_safe_mode = -1;  // -1 = uninitialized
        if (__builtin_expect(ld_safe_mode < 0, 0)) {
            const char* lds = getenv("HAKMEM_LD_SAFE");
-        int mode = (lds ? atoi(lds) : 1);
+            ld_safe_mode = (lds ? atoi(lds) : 1);
-        if (mode >= 2 || size > TINY_MAX_SIZE) {
+        }
        if (ld_safe_mode >= 2 || size > TINY_MAX_SIZE) {
            extern void* __libc_malloc(size_t);
            return __libc_malloc(size);
        }
@ -106,10 +115,15 @@ void* calloc(size_t nmemb, size_t size) {
        if (hak_ld_block_jemalloc() && hak_jemalloc_loaded()) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); }
        if (!g_initialized) { hak_init(); }
        if (g_initializing) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); }
        // Reuse cached ld_safe_mode from malloc (same static variable scope won't work, use inline function instead)
        // For now, duplicate the caching logic
        static _Atomic int ld_safe_mode_calloc = -1;
        if (__builtin_expect(ld_safe_mode_calloc < 0, 0)) {
            const char* lds = getenv("HAKMEM_LD_SAFE");
-        int mode = (lds ? atoi(lds) : 1);
+            ld_safe_mode_calloc = (lds ? atoi(lds) : 1);
        }
        size_t total = nmemb * size;
-        if (mode >= 2 || total > TINY_MAX_SIZE) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); }
+        if (ld_safe_mode_calloc >= 2 || total > TINY_MAX_SIZE) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); }
    }
    g_hakmem_lock_depth++;
    size_t total_size = nmemb * size;