Perf: getenv ホットパスボトルネック削除 (8.51% → 0%)

**問題:**
perf で発見:
- `getenv()`: 8.51% CPU on malloc hot path
- malloc 内で `getenv("HAKMEM_SFC_DEBUG")` が毎回実行
- getenv は環境変数の線形走査 → 非常に重い

**修正内容:**
1. `malloc()`: HAKMEM_SFC_DEBUG を初回のみ getenv して cache (Line 48-52)
2. `malloc()`: HAKMEM_LD_SAFE を初回のみ getenv して cache (Line 75-79)
3. `calloc()`: HAKMEM_LD_SAFE を初回のみ getenv して cache (Line 120-124)

**効果:**
- getenv CPU: 8.51% → 0% 
- superslab_refill: 10.30% → 9.61% (-7%)
- hak_tiny_alloc_slow が新トップ: 9.61%

**スループット:**
- 4,192,132 ops/s (変化なし)
- 理由: Syscall Saturation (86.7% kernel time) が支配的
- 次: SuperSlab Caching で syscall 90% 削減 → +100-150% 期待

**Perf結果 (before/after):**
```
Before:  getenv 8.51% | superslab_refill 10.30%
After:   getenv 0%    | hak_tiny_alloc_slow 9.61% | superslab_refill 9.61%
```

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-11-07 01:15:28 +09:00
parent db833142f1
commit f454d35ea4

View File

@ -44,8 +44,13 @@ void* malloc(size_t size) {
return __libc_malloc(size); return __libc_malloc(size);
} }
// Cache getenv result to avoid 8.51% CPU overhead on hot path
static _Atomic int debug_enabled = -1; // -1 = uninitialized
static _Atomic int debug_count = 0; static _Atomic int debug_count = 0;
if (getenv("HAKMEM_SFC_DEBUG") && debug_count < 100) { if (__builtin_expect(debug_enabled < 0, 0)) {
debug_enabled = (getenv("HAKMEM_SFC_DEBUG") != NULL) ? 1 : 0;
}
if (debug_enabled && debug_count < 100) {
int n = atomic_fetch_add(&debug_count, 1); int n = atomic_fetch_add(&debug_count, 1);
if (n < 20) fprintf(stderr, "[SFC_DEBUG] malloc(%zu)\n", size); if (n < 20) fprintf(stderr, "[SFC_DEBUG] malloc(%zu)\n", size);
} }
@ -66,9 +71,13 @@ void* malloc(size_t size) {
extern void* __libc_malloc(size_t); extern void* __libc_malloc(size_t);
return __libc_malloc(size); return __libc_malloc(size);
} }
// Cache HAKMEM_LD_SAFE to avoid repeated getenv on hot path
static _Atomic int ld_safe_mode = -1; // -1 = uninitialized
if (__builtin_expect(ld_safe_mode < 0, 0)) {
const char* lds = getenv("HAKMEM_LD_SAFE"); const char* lds = getenv("HAKMEM_LD_SAFE");
int mode = (lds ? atoi(lds) : 1); ld_safe_mode = (lds ? atoi(lds) : 1);
if (mode >= 2 || size > TINY_MAX_SIZE) { }
if (ld_safe_mode >= 2 || size > TINY_MAX_SIZE) {
extern void* __libc_malloc(size_t); extern void* __libc_malloc(size_t);
return __libc_malloc(size); return __libc_malloc(size);
} }
@ -106,10 +115,15 @@ void* calloc(size_t nmemb, size_t size) {
if (hak_ld_block_jemalloc() && hak_jemalloc_loaded()) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); } if (hak_ld_block_jemalloc() && hak_jemalloc_loaded()) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); }
if (!g_initialized) { hak_init(); } if (!g_initialized) { hak_init(); }
if (g_initializing) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); } if (g_initializing) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); }
// Reuse cached ld_safe_mode from malloc (same static variable scope won't work, use inline function instead)
// For now, duplicate the caching logic
static _Atomic int ld_safe_mode_calloc = -1;
if (__builtin_expect(ld_safe_mode_calloc < 0, 0)) {
const char* lds = getenv("HAKMEM_LD_SAFE"); const char* lds = getenv("HAKMEM_LD_SAFE");
int mode = (lds ? atoi(lds) : 1); ld_safe_mode_calloc = (lds ? atoi(lds) : 1);
}
size_t total = nmemb * size; size_t total = nmemb * size;
if (mode >= 2 || total > TINY_MAX_SIZE) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); } if (ld_safe_mode_calloc >= 2 || total > TINY_MAX_SIZE) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); }
} }
g_hakmem_lock_depth++; g_hakmem_lock_depth++;
size_t total_size = nmemb * size; size_t total_size = nmemb * size;