Files
hakmem/core/box/libm_reloc_guard_box.c
Moe Charm (CI) acc64f2438 Phase ML1: Pool v1 memset 89.73% overhead 軽量化 (+15.34% improvement)
## Summary
- ChatGPT により bench_profile.h の setenv segfault を修正(RTLD_NEXT 経由に切り替え)
- core/box/pool_zero_mode_box.h 新設:ENV キャッシュ経由で ZERO_MODE を統一管理
- core/hakmem_pool.c で zero mode に応じた memset 制御(FULL/header/off)
- A/B テスト結果:ZERO_MODE=header で +15.34% improvement(1M iterations, C6-heavy)

## Files Modified
- core/box/pool_api.inc.h: pool_zero_mode_box.h include
- core/bench_profile.h: glibc setenv → malloc+putenv(segfault 回避)
- core/hakmem_pool.c: zero mode 参照・制御ロジック
- core/box/pool_zero_mode_box.h (新設): enum/getter
- CURRENT_TASK.md: Phase ML1 結果記載

## Test Results
| Iterations | ZERO_MODE=full | ZERO_MODE=header | Improvement |
|-----------|----------------|-----------------|------------|
| 10K       | 3.06 M ops/s   | 3.17 M ops/s    | +3.65%     |
| 1M        | 23.71 M ops/s  | 27.34 M ops/s   | **+15.34%** |

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-10 09:08:18 +09:00

191 lines
5.8 KiB
C

// libm_reloc_guard_box.c - Box: libm .fini relocation guard
#include "libm_reloc_guard_box.h"
#include "log_once_box.h"
#include <dlfcn.h>
#include <link.h>
#include <math.h>
#include <stdint.h>
#include <stdatomic.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <unistd.h>
#if defined(__linux__) && defined(__x86_64__)
typedef struct {
uintptr_t base;
int patched;
} libm_reloc_ctx_t;
static hak_log_once_t g_libm_log_once = HAK_LOG_ONCE_INIT;
static hak_log_once_t g_libm_patch_once = HAK_LOG_ONCE_INIT;
static hak_log_once_t g_libm_fail_once = HAK_LOG_ONCE_INIT;
static _Atomic int g_libm_guard_ran = 0;
static int libm_reloc_env(const char* name, int default_on) {
const char* e = getenv(name);
if (!e || *e == '\0') {
return default_on;
}
return (*e != '0') ? 1 : 0;
}
int libm_reloc_guard_enabled(void) {
static int enabled = -1;
if (__builtin_expect(enabled == -1, 0)) {
enabled = libm_reloc_env("HAKMEM_LIBM_RELOC_GUARD", 1);
}
return enabled;
}
static int libm_reloc_guard_quiet(void) {
static int quiet = -1;
if (__builtin_expect(quiet == -1, 0)) {
quiet = libm_reloc_env("HAKMEM_LIBM_RELOC_GUARD_QUIET", 0);
}
return quiet;
}
static int libm_reloc_patch_enabled(void) {
static int patch = -1;
if (__builtin_expect(patch == -1, 0)) {
patch = libm_reloc_env("HAKMEM_LIBM_RELOC_PATCH", 1);
}
return patch;
}
static int libm_relocate_cb(struct dl_phdr_info* info, size_t size, void* data) {
(void)size;
libm_reloc_ctx_t* ctx = (libm_reloc_ctx_t*)data;
if ((uintptr_t)info->dlpi_addr != ctx->base) {
return 0;
}
ElfW(Addr) rela_off = 0;
ElfW(Xword) rela_sz = 0;
ElfW(Xword) rela_ent = sizeof(ElfW(Rela));
uintptr_t relro_start = 0;
size_t relro_size = 0;
for (ElfW(Half) i = 0; i < info->dlpi_phnum; i++) {
const ElfW(Phdr)* ph = &info->dlpi_phdr[i];
if (ph->p_type == PT_DYNAMIC) {
const ElfW(Dyn)* dyn = (const ElfW(Dyn)*)(info->dlpi_addr + ph->p_vaddr);
for (; dyn->d_tag != DT_NULL; ++dyn) {
switch (dyn->d_tag) {
case DT_RELA: rela_off = dyn->d_un.d_ptr; break;
case DT_RELASZ: rela_sz = dyn->d_un.d_val; break;
case DT_RELAENT: rela_ent = dyn->d_un.d_val; break;
default: break;
}
}
} else if (ph->p_type == PT_GNU_RELRO) {
relro_start = info->dlpi_addr + ph->p_vaddr;
relro_size = ph->p_memsz;
}
}
if (rela_off == 0 || rela_sz == 0) {
return 1;
}
size_t page_sz = (size_t)sysconf(_SC_PAGESIZE);
uintptr_t start = relro_start ? (relro_start & ~(page_sz - 1)) : 0;
size_t len = 0;
if (relro_size) {
size_t tail = (relro_start - start) + relro_size;
len = (tail + page_sz - 1) & ~(page_sz - 1);
(void)mprotect((void*)start, len, PROT_READ | PROT_WRITE);
}
ElfW(Rela)* rela = (ElfW(Rela)*)(ctx->base + rela_off);
size_t count = rela_ent ? (rela_sz / rela_ent) : 0;
for (size_t i = 0; i < count; i++) {
if (ELF64_R_TYPE(rela[i].r_info) == R_X86_64_RELATIVE) {
ElfW(Addr)* slot = (ElfW(Addr)*)(ctx->base + rela[i].r_offset);
*slot = ctx->base + rela[i].r_addend;
}
}
if (len) {
(void)mprotect((void*)start, len, PROT_READ);
}
ctx->patched = 1;
return 1;
}
static int libm_reloc_apply(uintptr_t base) {
libm_reloc_ctx_t ctx = {.base = base, .patched = 0};
dl_iterate_phdr(libm_relocate_cb, &ctx);
return ctx.patched;
}
void libm_reloc_guard_run(void) {
if (!libm_reloc_guard_enabled()) {
return;
}
if (atomic_exchange_explicit(&g_libm_guard_ran, 1, memory_order_relaxed)) {
return;
}
bool quiet = libm_reloc_guard_quiet() != 0;
Dl_info di = {0};
if (dladdr((void*)&cos, &di) == 0 || di.dli_fbase == NULL) {
hak_log_once_fprintf(&g_libm_fail_once, quiet, stderr, "[LIBM_RELOC_GUARD] dladdr(libm) failed\n");
return;
}
const uintptr_t base = (uintptr_t)di.dli_fbase;
const uintptr_t fini_off = 0xe5d88; // observed .fini_array[0] offset in libm.so.6
uintptr_t* fini_slot = (uintptr_t*)(base + fini_off);
uintptr_t raw = *fini_slot;
bool relocated = raw >= base;
hak_log_once_fprintf(&g_libm_log_once,
quiet,
stderr,
"[LIBM_RELOC_GUARD] base=%p slot=%p raw=%p relocated=%d\n",
(void*)di.dli_fbase,
(void*)fini_slot,
(void*)raw,
relocated ? 1 : 0);
if (relocated) {
return;
}
if (!libm_reloc_patch_enabled()) {
hak_log_once_fprintf(&g_libm_patch_once,
quiet,
stderr,
"[LIBM_RELOC_GUARD] unrelocated .fini_array detected (raw=%p); patch disabled\n",
(void*)raw);
return;
}
int patched = libm_reloc_apply(base);
if (patched) {
hak_log_once_fprintf(&g_libm_patch_once,
quiet,
stderr,
"[LIBM_RELOC_GUARD] relocated libm .rela.dyn (base=%p)\n",
(void*)di.dli_fbase);
} else {
hak_log_once_fprintf(&g_libm_fail_once,
quiet,
stderr,
"[LIBM_RELOC_GUARD] failed to relocate libm (base=%p)\n",
(void*)di.dli_fbase);
}
}
#else // non-linux/x86_64
int libm_reloc_guard_enabled(void) { return 0; }
void libm_reloc_guard_run(void) {}
#endif