Perf: getenv ホットパスボトルネック削除 (8.51% → 0%)
**問題:**
perf で発見:
- `getenv()`: 8.51% CPU on malloc hot path
- malloc 内で `getenv("HAKMEM_SFC_DEBUG")` が毎回実行
- getenv は環境変数の線形走査 → 非常に重い
**修正内容:**
1. `malloc()`: HAKMEM_SFC_DEBUG を初回のみ getenv して cache (Line 48-52)
2. `malloc()`: HAKMEM_LD_SAFE を初回のみ getenv して cache (Line 75-79)
3. `calloc()`: HAKMEM_LD_SAFE を初回のみ getenv して cache (Line 120-124)
**効果:**
- getenv CPU: 8.51% → 0% ✅
- superslab_refill: 10.30% → 9.61% (-7%)
- hak_tiny_alloc_slow が新トップ: 9.61%
**スループット:**
- 4,192,132 ops/s (変化なし)
- 理由: Syscall Saturation (86.7% kernel time) が支配的
- 次: SuperSlab Caching で syscall 90% 削減 → +100-150% 期待
**Perf結果 (before/after):**
```
Before: getenv 8.51% | superslab_refill 10.30%
After: getenv 0% | hak_tiny_alloc_slow 9.61% | superslab_refill 9.61%
```
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -44,8 +44,13 @@ void* malloc(size_t size) {
|
||||
return __libc_malloc(size);
|
||||
}
|
||||
|
||||
// Cache getenv result to avoid 8.51% CPU overhead on hot path
|
||||
static _Atomic int debug_enabled = -1; // -1 = uninitialized
|
||||
static _Atomic int debug_count = 0;
|
||||
if (getenv("HAKMEM_SFC_DEBUG") && debug_count < 100) {
|
||||
if (__builtin_expect(debug_enabled < 0, 0)) {
|
||||
debug_enabled = (getenv("HAKMEM_SFC_DEBUG") != NULL) ? 1 : 0;
|
||||
}
|
||||
if (debug_enabled && debug_count < 100) {
|
||||
int n = atomic_fetch_add(&debug_count, 1);
|
||||
if (n < 20) fprintf(stderr, "[SFC_DEBUG] malloc(%zu)\n", size);
|
||||
}
|
||||
@ -66,9 +71,13 @@ void* malloc(size_t size) {
|
||||
extern void* __libc_malloc(size_t);
|
||||
return __libc_malloc(size);
|
||||
}
|
||||
// Cache HAKMEM_LD_SAFE to avoid repeated getenv on hot path
|
||||
static _Atomic int ld_safe_mode = -1; // -1 = uninitialized
|
||||
if (__builtin_expect(ld_safe_mode < 0, 0)) {
|
||||
const char* lds = getenv("HAKMEM_LD_SAFE");
|
||||
int mode = (lds ? atoi(lds) : 1);
|
||||
if (mode >= 2 || size > TINY_MAX_SIZE) {
|
||||
ld_safe_mode = (lds ? atoi(lds) : 1);
|
||||
}
|
||||
if (ld_safe_mode >= 2 || size > TINY_MAX_SIZE) {
|
||||
extern void* __libc_malloc(size_t);
|
||||
return __libc_malloc(size);
|
||||
}
|
||||
@ -106,10 +115,15 @@ void* calloc(size_t nmemb, size_t size) {
|
||||
if (hak_ld_block_jemalloc() && hak_jemalloc_loaded()) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); }
|
||||
if (!g_initialized) { hak_init(); }
|
||||
if (g_initializing) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); }
|
||||
// Reuse cached ld_safe_mode from malloc (same static variable scope won't work, use inline function instead)
|
||||
// For now, duplicate the caching logic
|
||||
static _Atomic int ld_safe_mode_calloc = -1;
|
||||
if (__builtin_expect(ld_safe_mode_calloc < 0, 0)) {
|
||||
const char* lds = getenv("HAKMEM_LD_SAFE");
|
||||
int mode = (lds ? atoi(lds) : 1);
|
||||
ld_safe_mode_calloc = (lds ? atoi(lds) : 1);
|
||||
}
|
||||
size_t total = nmemb * size;
|
||||
if (mode >= 2 || total > TINY_MAX_SIZE) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); }
|
||||
if (ld_safe_mode_calloc >= 2 || total > TINY_MAX_SIZE) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); }
|
||||
}
|
||||
g_hakmem_lock_depth++;
|
||||
size_t total_size = nmemb * size;
|
||||
|
||||
Reference in New Issue
Block a user