Optimize mincore() with TLS page cache (Phase A optimization)
Problem:
- SEGV fix (696aa7c0b) added 1,591 mincore() syscalls (11.0% time)
- Performance regression: 9.38M → 563K ops/s (-94%)
Solution: TLS page cache for last-checked pages
- Cache s_last_page1/page2 → is_mapped (2 slots)
- Expected hit rate: 90-95% (temporal locality)
- Fallback: mincore() syscall on cache miss
Implementation:
- Fast path: if (page == s_last_page1) → reuse cached result
- Boundary handling: Check both pages if AllocHeader crosses page
- Thread-safe: __thread static variables (no locks)
Expected Impact:
- mincore() calls: 1,591 → ~100-150 (-90-94%)
- Throughput: 563K → 647K ops/s (+15% estimated)
Next: Task B-1 SuperSlab LRU/Prewarm investigation
This commit is contained in:
@ -194,22 +194,52 @@ void hak_free_at(void* ptr, size_t size, hak_callsite_t site) {
|
|||||||
|
|
||||||
// CRITICAL FIX (2025-11-14): Use real mincore() to check memory accessibility
|
// CRITICAL FIX (2025-11-14): Use real mincore() to check memory accessibility
|
||||||
// Phase 9 gutted hak_is_memory_readable() to always return 1 (unsafe!)
|
// Phase 9 gutted hak_is_memory_readable() to always return 1 (unsafe!)
|
||||||
// We MUST verify memory is mapped before dereferencing AllocHeader
|
// We MUST verify memory is mapped before dereferencing AllocHeader.
|
||||||
|
//
|
||||||
|
// Step A (2025-11-14): TLS page cache to reduce mincore() frequency.
|
||||||
|
// - Cache last-checked pages in __thread statics.
|
||||||
|
// - Typical case: many frees on the same handful of pages → 90%+ cache hit.
|
||||||
int is_mapped = 0;
|
int is_mapped = 0;
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
{
|
{
|
||||||
|
// TLS cache for page→is_mapped
|
||||||
|
static __thread void* s_last_page1 = NULL;
|
||||||
|
static __thread int s_last_page1_mapped = 0;
|
||||||
|
static __thread void* s_last_page2 = NULL;
|
||||||
|
static __thread int s_last_page2_mapped = 0;
|
||||||
|
|
||||||
unsigned char vec;
|
unsigned char vec;
|
||||||
// Check both pages if header crosses page boundary
|
|
||||||
void* page1 = (void*)((uintptr_t)raw & ~0xFFFUL);
|
void* page1 = (void*)((uintptr_t)raw & ~0xFFFUL);
|
||||||
void* page2 = (void*)(((uintptr_t)raw + sizeof(AllocHeader) - 1) & ~0xFFFUL);
|
void* page2 = (void*)(((uintptr_t)raw + sizeof(AllocHeader) - 1) & ~0xFFFUL);
|
||||||
|
|
||||||
|
// Fast path: reuse cached result for page1 when possible
|
||||||
|
if (page1 == s_last_page1) {
|
||||||
|
is_mapped = s_last_page1_mapped;
|
||||||
|
} else {
|
||||||
is_mapped = (mincore(page1, 1, &vec) == 0);
|
is_mapped = (mincore(page1, 1, &vec) == 0);
|
||||||
|
s_last_page1 = page1;
|
||||||
|
s_last_page1_mapped = is_mapped;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If header crosses page boundary, ensure second page is also mapped
|
||||||
if (is_mapped && page2 != page1) {
|
if (is_mapped && page2 != page1) {
|
||||||
is_mapped = (mincore(page2, 1, &vec) == 0);
|
if (page2 == s_last_page2) {
|
||||||
|
if (!s_last_page2_mapped) {
|
||||||
|
is_mapped = 0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
int mapped2 = (mincore(page2, 1, &vec) == 0);
|
||||||
|
s_last_page2 = page2;
|
||||||
|
s_last_page2_mapped = mapped2;
|
||||||
|
if (!mapped2) {
|
||||||
|
is_mapped = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
is_mapped = 1; // Assume mapped on non-Linux
|
is_mapped = 1; // Assume mapped on non-Linux
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (!is_mapped) {
|
if (!is_mapped) {
|
||||||
// Memory not accessible, ptr likely has no header
|
// Memory not accessible, ptr likely has no header
|
||||||
|
|||||||
Reference in New Issue
Block a user