POOL-MID-DN-BATCH: Add last-match cache to reduce linear search overhead
Root cause: Linear search in 32-entry TLS map averaged 16 iterations, causing instruction overhead that exceeded mid_desc_lookup savings. Fix implemented: - Added last_idx field to MidInuseTlsPageMap for temporal locality - Check last_idx before linear search (O(1) fast path) - Update last_idx on hits and new entries - Reset last_idx on drain Changes: 1. pool_mid_inuse_tls_pagemap_box.h: - Added uint32_t last_idx field to struct 2. pool_mid_inuse_deferred_box.h: - Check last_idx before linear search (lines 90-94) - Update last_idx on linear search hit (line 101) - Set last_idx on new entry insert (line 117) - Reset last_idx on drain (line 166) Benchmark results (bench_mid_large_mt_hakmem): - Baseline (DEFERRED=0): median 9.08M ops/s, variance 300B - Deferred with cache (DEFERRED=1): median 8.38M ops/s, variance 207B - Performance: -7.6% regression (vs expected +2-4% gain) - Stability: -31% variance (improvement as expected) Analysis: The last-match cache reduces variance but does not eliminate the regression for this benchmark's random access pattern (2048 slots, many pages). The temporal locality assumption (60-80% hit rate) is not met by bench_mid_large_mt's allocation pattern. Further optimization needed: - Consider hash-based lookup for better than O(n) search - OR reduce map size to decrease search iterations - OR add drain triggers at better boundaries 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@ -85,10 +85,20 @@ static inline void mid_inuse_dec_deferred(void* raw) {
|
|||||||
|
|
||||||
// Search TLS map for existing page entry
|
// Search TLS map for existing page entry
|
||||||
MidInuseTlsPageMap* map = &g_mid_inuse_tls_map;
|
MidInuseTlsPageMap* map = &g_mid_inuse_tls_map;
|
||||||
|
|
||||||
|
// Check last match first (60-80% hit rate expected - temporal locality)
|
||||||
|
if (map->last_idx < map->used && map->pages[map->last_idx] == page) {
|
||||||
|
map->counts[map->last_idx]++;
|
||||||
|
MID_INUSE_DEFERRED_STAT_INC(mid_inuse_deferred_hit);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback to linear search (now rarely executed)
|
||||||
for (uint32_t i = 0; i < map->used; i++) {
|
for (uint32_t i = 0; i < map->used; i++) {
|
||||||
if (map->pages[i] == page) {
|
if (map->pages[i] == page) {
|
||||||
// Page already in map, increment count
|
// Page already in map, increment count
|
||||||
map->counts[i]++;
|
map->counts[i]++;
|
||||||
|
map->last_idx = i; // Update last_idx on hit
|
||||||
MID_INUSE_DEFERRED_STAT_INC(mid_inuse_deferred_hit);
|
MID_INUSE_DEFERRED_STAT_INC(mid_inuse_deferred_hit);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -104,6 +114,7 @@ static inline void mid_inuse_dec_deferred(void* raw) {
|
|||||||
uint32_t idx = map->used++;
|
uint32_t idx = map->used++;
|
||||||
map->pages[idx] = page;
|
map->pages[idx] = page;
|
||||||
map->counts[idx] = 1;
|
map->counts[idx] = 1;
|
||||||
|
map->last_idx = idx; // Set last_idx to new entry (temporal locality)
|
||||||
MID_INUSE_DEFERRED_STAT_INC(mid_inuse_deferred_hit);
|
MID_INUSE_DEFERRED_STAT_INC(mid_inuse_deferred_hit);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -152,6 +163,7 @@ static inline void mid_inuse_deferred_drain(void) {
|
|||||||
|
|
||||||
// Clear map (reset for next batch)
|
// Clear map (reset for next batch)
|
||||||
map->used = 0;
|
map->used = 0;
|
||||||
|
map->last_idx = 0; // Reset last_idx on drain
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // POOL_MID_INUSE_DEFERRED_BOX_H
|
#endif // POOL_MID_INUSE_DEFERRED_BOX_H
|
||||||
|
|||||||
@ -23,6 +23,7 @@ typedef struct {
|
|||||||
void* pages[MID_INUSE_TLS_MAP_SIZE]; // Page base addresses
|
void* pages[MID_INUSE_TLS_MAP_SIZE]; // Page base addresses
|
||||||
uint32_t counts[MID_INUSE_TLS_MAP_SIZE]; // Pending dec count per page
|
uint32_t counts[MID_INUSE_TLS_MAP_SIZE]; // Pending dec count per page
|
||||||
uint32_t used; // Number of active entries
|
uint32_t used; // Number of active entries
|
||||||
|
uint32_t last_idx; // Cache last hit index for temporal locality
|
||||||
} MidInuseTlsPageMap;
|
} MidInuseTlsPageMap;
|
||||||
|
|
||||||
// Thread-local instance (zero-initialized by default)
|
// Thread-local instance (zero-initialized by default)
|
||||||
|
|||||||
Reference in New Issue
Block a user