Optimize mincore() with TLS page cache (Phase A optimization)
Problem:
- SEGV fix (696aa7c0b) added 1,591 mincore() syscalls (11.0% time)
- Performance regression: 9.38M → 563K ops/s (-94%)
Solution: TLS page cache for last-checked pages
- Cache s_last_page1/page2 → is_mapped (2 slots)
- Expected hit rate: 90-95% (temporal locality)
- Fallback: mincore() syscall on cache miss
Implementation:
- Fast path: if (page == s_last_page1) → reuse cached result
- Boundary handling: Check both pages if AllocHeader crosses page
- Thread-safe: __thread static variables (no locks)
Expected Impact:
- mincore() calls: 1,591 → ~100-150 (-90-94%)
- Throughput: 563K → 647K ops/s (+15% estimated)
Next: Task B-1 SuperSlab LRU/Prewarm investigation
This commit is contained in:
@ -194,22 +194,52 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
|
||||
|
||||
// CRITICAL FIX (2025-11-14): Use real mincore() to check memory accessibility
|
||||
// Phase 9 gutted hak_is_memory_readable() to always return 1 (unsafe!)
|
||||
// We MUST verify memory is mapped before dereferencing AllocHeader
|
||||
// We MUST verify memory is mapped before dereferencing AllocHeader.
|
||||
//
|
||||
// Step A (2025-11-14): TLS page cache to reduce mincore() frequency.
|
||||
// - Cache last-checked pages in __thread statics.
|
||||
// - Typical case: many frees on the same handful of pages → 90%+ cache hit.
|
||||
int is_mapped = 0;
|
||||
#ifdef __linux__
|
||||
#ifdef __linux__
|
||||
{
|
||||
// TLS cache for page→is_mapped
|
||||
static __thread void* s_last_page1 = NULL;
|
||||
static __thread int s_last_page1_mapped = 0;
|
||||
static __thread void* s_last_page2 = NULL;
|
||||
static __thread int s_last_page2_mapped = 0;
|
||||
|
||||
unsigned char vec;
|
||||
// Check both pages if header crosses page boundary
|
||||
void* page1 = (void*)((uintptr_t)raw & ~0xFFFUL);
|
||||
void* page2 = (void*)(((uintptr_t)raw + sizeof(AllocHeader) - 1) & ~0xFFFUL);
|
||||
is_mapped = (mincore(page1, 1, &vec) == 0);
|
||||
|
||||
// Fast path: reuse cached result for page1 when possible
|
||||
if (page1 == s_last_page1) {
|
||||
is_mapped = s_last_page1_mapped;
|
||||
} else {
|
||||
is_mapped = (mincore(page1, 1, &vec) == 0);
|
||||
s_last_page1 = page1;
|
||||
s_last_page1_mapped = is_mapped;
|
||||
}
|
||||
|
||||
// If header crosses page boundary, ensure second page is also mapped
|
||||
if (is_mapped && page2 != page1) {
|
||||
is_mapped = (mincore(page2, 1, &vec) == 0);
|
||||
if (page2 == s_last_page2) {
|
||||
if (!s_last_page2_mapped) {
|
||||
is_mapped = 0;
|
||||
}
|
||||
} else {
|
||||
int mapped2 = (mincore(page2, 1, &vec) == 0);
|
||||
s_last_page2 = page2;
|
||||
s_last_page2_mapped = mapped2;
|
||||
if (!mapped2) {
|
||||
is_mapped = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
#else
|
||||
is_mapped = 1; // Assume mapped on non-Linux
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (!is_mapped) {
|
||||
// Memory not accessible, ptr likely has no header
|
||||
|
||||
Reference in New Issue
Block a user