From c6a2a6d38abebad80c45219982fc1b64faed9574 Mon Sep 17 00:00:00 2001 From: "Moe Charm (CI)" Date: Fri, 14 Nov 2025 06:32:38 +0900 Subject: [PATCH] Optimize mincore() with TLS page cache (Phase A optimization) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem: - SEGV fix (696aa7c0b) added 1,591 mincore() syscalls (11.0% time) - Performance regression: 9.38M → 563K ops/s (-94%) Solution: TLS page cache for last-checked pages - Cache s_last_page1/page2 → is_mapped (2 slots) - Expected hit rate: 90-95% (temporal locality) - Fallback: mincore() syscall on cache miss Implementation: - Fast path: if (page == s_last_page1) → reuse cached result - Boundary handling: Check both pages if AllocHeader crosses page - Thread-safe: __thread static variables (no locks) Expected Impact: - mincore() calls: 1,591 → ~100-150 (-90-94%) - Throughput: 563K → 647K ops/s (+15% estimated) Next: Task B-1 SuperSlab LRU/Prewarm investigation --- core/box/hak_free_api.inc.h | 44 +++++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/core/box/hak_free_api.inc.h b/core/box/hak_free_api.inc.h index a468af47..871792bf 100644 --- a/core/box/hak_free_api.inc.h +++ b/core/box/hak_free_api.inc.h @@ -194,22 +194,52 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) { // CRITICAL FIX (2025-11-14): Use real mincore() to check memory accessibility // Phase 9 gutted hak_is_memory_readable() to always return 1 (unsafe!) - // We MUST verify memory is mapped before dereferencing AllocHeader + // We MUST verify memory is mapped before dereferencing AllocHeader. + // + // Step A (2025-11-14): TLS page cache to reduce mincore() frequency. + // - Cache last-checked pages in __thread statics. + // - Typical case: many frees on the same handful of pages → 90%+ cache hit. int is_mapped = 0; - #ifdef __linux__ +#ifdef __linux__ { + // TLS cache for page→is_mapped + static __thread void* s_last_page1 = NULL; + static __thread int s_last_page1_mapped = 0; + static __thread void* s_last_page2 = NULL; + static __thread int s_last_page2_mapped = 0; + unsigned char vec; - // Check both pages if header crosses page boundary void* page1 = (void*)((uintptr_t)raw & ~0xFFFUL); void* page2 = (void*)(((uintptr_t)raw + sizeof(AllocHeader) - 1) & ~0xFFFUL); - is_mapped = (mincore(page1, 1, &vec) == 0); + + // Fast path: reuse cached result for page1 when possible + if (page1 == s_last_page1) { + is_mapped = s_last_page1_mapped; + } else { + is_mapped = (mincore(page1, 1, &vec) == 0); + s_last_page1 = page1; + s_last_page1_mapped = is_mapped; + } + + // If header crosses page boundary, ensure second page is also mapped if (is_mapped && page2 != page1) { - is_mapped = (mincore(page2, 1, &vec) == 0); + if (page2 == s_last_page2) { + if (!s_last_page2_mapped) { + is_mapped = 0; + } + } else { + int mapped2 = (mincore(page2, 1, &vec) == 0); + s_last_page2 = page2; + s_last_page2_mapped = mapped2; + if (!mapped2) { + is_mapped = 0; + } + } } } - #else +#else is_mapped = 1; // Assume mapped on non-Linux - #endif +#endif if (!is_mapped) { // Memory not accessible, ptr likely has no header