Files
hakmem/core/box/hak_wrappers.inc.h
Moe Charm (CI) f454d35ea4 Perf: getenv ホットパスボトルネック削除 (8.51% → 0%)
**問題:**
perf で発見:
- `getenv()`: 8.51% CPU on malloc hot path
- malloc 内で `getenv("HAKMEM_SFC_DEBUG")` が毎回実行
- getenv は環境変数の線形走査 → 非常に重い

**修正内容:**
1. `malloc()`: HAKMEM_SFC_DEBUG を初回のみ getenv して cache (Line 48-52)
2. `malloc()`: HAKMEM_LD_SAFE を初回のみ getenv して cache (Line 75-79)
3. `calloc()`: HAKMEM_LD_SAFE を初回のみ getenv して cache (Line 120-124)

**効果:**
- getenv CPU: 8.51% → 0% 
- superslab_refill: 10.30% → 9.61% (-7%)
- hak_tiny_alloc_slow が新トップ: 9.61%

**スループット:**
- 4,192,132 ops/s (変化なし)
- 理由: Syscall Saturation (86.7% kernel time) が支配的
- 次: SuperSlab Caching で syscall 90% 削減 → +100-150% 期待

**Perf結果 (before/after):**
```
Before:  getenv 8.51% | superslab_refill 10.30%
After:   getenv 0%    | hak_tiny_alloc_slow 9.61% | superslab_refill 9.61%
```

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-07 01:15:28 +09:00

159 lines
6.3 KiB
C

// hak_wrappers.inc.h — malloc/free/calloc/realloc wrappers (LD_PRELOAD-aware)
#ifndef HAK_WRAPPERS_INC_H
#define HAK_WRAPPERS_INC_H
#ifdef HAKMEM_FORCE_LIBC_ALLOC_BUILD
// Sanitizer/diagnostic builds: bypass hakmem allocator completely.
void* malloc(size_t size) {
extern void* __libc_malloc(size_t);
return __libc_malloc(size);
}
void free(void* ptr) {
if (!ptr) return;
extern void __libc_free(void*);
__libc_free(ptr);
}
void* calloc(size_t nmemb, size_t size) {
extern void* __libc_calloc(size_t, size_t);
return __libc_calloc(nmemb, size);
}
void* realloc(void* ptr, size_t size) {
extern void* __libc_realloc(void*, size_t);
return __libc_realloc(ptr, size);
}
#else
// malloc wrapper - intercepts system malloc() calls
__thread uint64_t g_malloc_total_calls = 0;
__thread uint64_t g_malloc_tiny_size_match = 0;
__thread uint64_t g_malloc_fast_path_tried = 0;
__thread uint64_t g_malloc_fast_path_null = 0;
__thread uint64_t g_malloc_slow_path = 0;
extern __thread void* g_tls_sll_head[TINY_NUM_CLASSES];
void* malloc(size_t size) {
// Guard against recursion during initialization FIRST!
if (__builtin_expect(g_initializing != 0, 0)) {
extern void* __libc_malloc(size_t);
return __libc_malloc(size);
}
// Cache getenv result to avoid 8.51% CPU overhead on hot path
static _Atomic int debug_enabled = -1; // -1 = uninitialized
static _Atomic int debug_count = 0;
if (__builtin_expect(debug_enabled < 0, 0)) {
debug_enabled = (getenv("HAKMEM_SFC_DEBUG") != NULL) ? 1 : 0;
}
if (debug_enabled && debug_count < 100) {
int n = atomic_fetch_add(&debug_count, 1);
if (n < 20) fprintf(stderr, "[SFC_DEBUG] malloc(%zu)\n", size);
}
if (__builtin_expect(hak_force_libc_alloc(), 0)) {
extern void* __libc_malloc(size_t);
return __libc_malloc(size);
}
int ld_mode = hak_ld_env_mode();
if (ld_mode) {
if (hak_ld_block_jemalloc() && hak_jemalloc_loaded()) {
extern void* __libc_malloc(size_t);
return __libc_malloc(size);
}
if (!g_initialized) { hak_init(); }
if (g_initializing) {
extern void* __libc_malloc(size_t);
return __libc_malloc(size);
}
// Cache HAKMEM_LD_SAFE to avoid repeated getenv on hot path
static _Atomic int ld_safe_mode = -1; // -1 = uninitialized
if (__builtin_expect(ld_safe_mode < 0, 0)) {
const char* lds = getenv("HAKMEM_LD_SAFE");
ld_safe_mode = (lds ? atoi(lds) : 1);
}
if (ld_safe_mode >= 2 || size > TINY_MAX_SIZE) {
extern void* __libc_malloc(size_t);
return __libc_malloc(size);
}
}
g_hakmem_lock_depth++;
void* ptr = hak_alloc_at(size, HAK_CALLSITE());
g_hakmem_lock_depth--;
return ptr;
}
void free(void* ptr) {
atomic_fetch_add_explicit(&g_free_wrapper_calls, 1, memory_order_relaxed);
if (!ptr) return;
if (g_hakmem_lock_depth > 0) { extern void __libc_free(void*); __libc_free(ptr); return; }
if (__builtin_expect(g_initializing != 0, 0)) { extern void __libc_free(void*); __libc_free(ptr); return; }
if (__builtin_expect(hak_force_libc_alloc(), 0)) { extern void __libc_free(void*); __libc_free(ptr); return; }
if (hak_ld_env_mode()) {
if (hak_ld_block_jemalloc() && hak_jemalloc_loaded()) { extern void __libc_free(void*); __libc_free(ptr); return; }
if (!g_initialized) { hak_init(); }
if (g_initializing) { extern void __libc_free(void*); __libc_free(ptr); return; }
}
g_hakmem_lock_depth++;
hak_free_at(ptr, 0, HAK_CALLSITE());
g_hakmem_lock_depth--;
}
void* calloc(size_t nmemb, size_t size) {
if (g_hakmem_lock_depth > 0) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); }
if (__builtin_expect(g_initializing != 0, 0)) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); }
if (size != 0 && nmemb > (SIZE_MAX / size)) { errno = ENOMEM; return NULL; }
if (__builtin_expect(hak_force_libc_alloc(), 0)) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); }
int ld_mode = hak_ld_env_mode();
if (ld_mode) {
if (hak_ld_block_jemalloc() && hak_jemalloc_loaded()) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); }
if (!g_initialized) { hak_init(); }
if (g_initializing) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); }
// Reuse cached ld_safe_mode from malloc (same static variable scope won't work, use inline function instead)
// For now, duplicate the caching logic
static _Atomic int ld_safe_mode_calloc = -1;
if (__builtin_expect(ld_safe_mode_calloc < 0, 0)) {
const char* lds = getenv("HAKMEM_LD_SAFE");
ld_safe_mode_calloc = (lds ? atoi(lds) : 1);
}
size_t total = nmemb * size;
if (ld_safe_mode_calloc >= 2 || total > TINY_MAX_SIZE) { extern void* __libc_calloc(size_t, size_t); return __libc_calloc(nmemb, size); }
}
g_hakmem_lock_depth++;
size_t total_size = nmemb * size;
void* ptr = hak_alloc_at(total_size, HAK_CALLSITE());
if (ptr) { memset(ptr, 0, total_size); }
g_hakmem_lock_depth--;
return ptr;
}
void* realloc(void* ptr, size_t size) {
if (g_hakmem_lock_depth > 0) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); }
if (__builtin_expect(g_initializing != 0, 0)) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); }
if (__builtin_expect(hak_force_libc_alloc(), 0)) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); }
int ld_mode = hak_ld_env_mode();
if (ld_mode) {
if (hak_ld_block_jemalloc() && hak_jemalloc_loaded()) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); }
if (!g_initialized) { hak_init(); }
if (g_initializing) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); }
}
if (ptr == NULL) { return malloc(size); }
if (size == 0) { free(ptr); return NULL; }
void* new_ptr = malloc(size);
if (!new_ptr) return NULL;
memcpy(new_ptr, ptr, size);
free(ptr);
return new_ptr;
}
#endif // HAKMEM_FORCE_LIBC_ALLOC_BUILD
#endif // HAK_WRAPPERS_INC_H