Phase V6-HDR-4: Headerless 最適化 (P0 + P1)
## P0: Double validation 排除 - region_id_lookup_v6() で TLS segment 登録済み + 範囲内なら small_page_meta_v6_of() を呼ばずに直接 page_meta を計算 - 削除された重複チェック: - slot->in_use (TLS登録で保証) - small_ptr_in_segment_v6() (addr範囲で既にチェック済み) - 関数呼び出しオーバーヘッド - 推定効果: +1-2% (6-8 instructions 削減) ## P1: TLS cache に page_meta キャッシュ追加 - RegionIdTlsCache に追加: - last_page_base / last_page_end (ページ範囲) - last_page (SmallPageMetaV6* 直接ポインタ) - region_id_lookup_cached_v6() で same-page hit 時は page_meta lookup を完全スキップ - 推定効果: +1.5-2.5% (10-12 instructions 削減) ## ベンチマーク結果 (揺れあり) - V6-HDR-3 (P0/P1 前): -3.5% ~ -8.3% 回帰 - V6-HDR-4 (P0+P1 後): +2.7% ~ +12% 改善 (一部の run で) 設計原則: - RegionIdBox は薄く保つ (分類のみ) - キャッシュは TLS 側に寄せる - same-page 判定で last_page_base/end を使用 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@ -122,9 +122,13 @@ void region_id_observe_unregister(uint32_t id);
|
|||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|
||||||
/// TLS cache for fast lookup (single entry)
|
/// TLS cache for fast lookup (single entry)
|
||||||
|
/// Phase V6-HDR-4 P1: page_meta キャッシュ追加
|
||||||
typedef struct RegionIdTlsCache {
|
typedef struct RegionIdTlsCache {
|
||||||
uintptr_t last_base; // Cached region base
|
uintptr_t last_base; // Cached segment base
|
||||||
uintptr_t last_end; // Cached region end
|
uintptr_t last_end; // Cached segment end
|
||||||
|
uintptr_t last_page_base; // Cached page base (for same-page optimization)
|
||||||
|
uintptr_t last_page_end; // Cached page end
|
||||||
|
SmallPageMetaV6* last_page; // Cached page_meta (direct pointer)
|
||||||
RegionLookupV6 last_result; // Cached result
|
RegionLookupV6 last_result; // Cached result
|
||||||
} RegionIdTlsCache;
|
} RegionIdTlsCache;
|
||||||
|
|
||||||
|
|||||||
@ -103,20 +103,37 @@ RegionLookupV6 region_id_lookup_v6(void* ptr) {
|
|||||||
|
|
||||||
uintptr_t addr = (uintptr_t)ptr;
|
uintptr_t addr = (uintptr_t)ptr;
|
||||||
|
|
||||||
// Phase V6-HDR-3: Check TLS segment registration first (fast path)
|
// Phase V6-HDR-4 P0: Direct page_meta calculation (Double validation 排除)
|
||||||
|
// TLS segment が登録済みかつ範囲内なら、small_page_meta_v6_of() を呼ばずに
|
||||||
|
// 直接 page_meta を計算する。これにより:
|
||||||
|
// - slot->in_use チェックの重複を排除
|
||||||
|
// - small_ptr_in_segment_v6() の重複を排除
|
||||||
|
// - 関数呼び出しオーバーヘッドを削減
|
||||||
if (g_v6_segment_registered && addr >= g_v6_segment_base && addr < g_v6_segment_end) {
|
if (g_v6_segment_registered && addr >= g_v6_segment_base && addr < g_v6_segment_end) {
|
||||||
// ptr is in this thread's v6 segment - get page_meta
|
// Calculate page_idx directly using TLS cached base
|
||||||
SmallPageMetaV6* page = small_page_meta_v6_of(ptr);
|
size_t page_idx = (addr - g_v6_segment_base) >> SMALL_PAGE_V6_SHIFT;
|
||||||
if (page != NULL) {
|
|
||||||
result.kind = REGION_KIND_SMALL_V6;
|
|
||||||
result.region_id = 1; // Single TLS segment per thread
|
|
||||||
result.page_meta = page;
|
|
||||||
|
|
||||||
// Update TLS cache
|
// Bounds check (should always pass if segment is valid)
|
||||||
RegionIdTlsCache* cache = region_id_tls_cache_get();
|
if (likely(page_idx < SMALL_PAGES_PER_SEGMENT)) {
|
||||||
cache->last_base = g_v6_segment_base;
|
SmallPageMetaV6* page = &g_v6_segment->page_meta[page_idx];
|
||||||
cache->last_end = g_v6_segment_end;
|
|
||||||
cache->last_result = result;
|
// Minimal validation: only check capacity (page is in use)
|
||||||
|
if (likely(page->capacity > 0)) {
|
||||||
|
result.kind = REGION_KIND_SMALL_V6;
|
||||||
|
result.region_id = 1;
|
||||||
|
result.page_meta = page;
|
||||||
|
|
||||||
|
// Phase V6-HDR-4 P1: Update TLS cache with page-level info
|
||||||
|
RegionIdTlsCache* cache = region_id_tls_cache_get();
|
||||||
|
cache->last_base = g_v6_segment_base;
|
||||||
|
cache->last_end = g_v6_segment_end;
|
||||||
|
// Page-level cache: same-page ptr は page_meta lookup をスキップ可能
|
||||||
|
uintptr_t page_base = g_v6_segment_base + (page_idx << SMALL_PAGE_V6_SHIFT);
|
||||||
|
cache->last_page_base = page_base;
|
||||||
|
cache->last_page_end = page_base + SMALL_PAGE_V6_SIZE;
|
||||||
|
cache->last_page = page;
|
||||||
|
cache->last_result = result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -144,16 +161,33 @@ RegionLookupV6 region_id_lookup_cached_v6(void* ptr) {
|
|||||||
RegionIdTlsCache* cache = region_id_tls_cache_get();
|
RegionIdTlsCache* cache = region_id_tls_cache_get();
|
||||||
uintptr_t addr = (uintptr_t)ptr;
|
uintptr_t addr = (uintptr_t)ptr;
|
||||||
|
|
||||||
// TLS cache hit?
|
// Phase V6-HDR-4 P1: Same-page hit optimization
|
||||||
|
// 同一ページ内なら page_meta lookup を完全にスキップ
|
||||||
|
if (addr >= cache->last_page_base && addr < cache->last_page_end &&
|
||||||
|
cache->last_page != NULL && cache->last_page->capacity > 0) {
|
||||||
|
// Same page hit - return cached page_meta directly
|
||||||
|
RegionLookupV6 result = cache->last_result;
|
||||||
|
result.page_meta = cache->last_page;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Segment hit but different page - need to recalculate page_meta
|
||||||
if (addr >= cache->last_base && addr < cache->last_end &&
|
if (addr >= cache->last_base && addr < cache->last_end &&
|
||||||
cache->last_result.kind != REGION_KIND_UNKNOWN) {
|
cache->last_result.kind != REGION_KIND_UNKNOWN) {
|
||||||
// Cache hit - but still need to get the correct page_meta
|
// Calculate new page_idx
|
||||||
// for this specific ptr (page_idx may differ)
|
size_t page_idx = (addr - cache->last_base) >> SMALL_PAGE_V6_SHIFT;
|
||||||
SmallPageMetaV6* page = small_page_meta_v6_of(ptr);
|
if (likely(page_idx < SMALL_PAGES_PER_SEGMENT && g_v6_segment != NULL)) {
|
||||||
if (page) {
|
SmallPageMetaV6* page = &g_v6_segment->page_meta[page_idx];
|
||||||
RegionLookupV6 result = cache->last_result;
|
if (likely(page->capacity > 0)) {
|
||||||
result.page_meta = page;
|
RegionLookupV6 result = cache->last_result;
|
||||||
return result;
|
result.page_meta = page;
|
||||||
|
// Update page-level cache
|
||||||
|
uintptr_t page_base = cache->last_base + (page_idx << SMALL_PAGE_V6_SHIFT);
|
||||||
|
cache->last_page_base = page_base;
|
||||||
|
cache->last_page_end = page_base + SMALL_PAGE_V6_SIZE;
|
||||||
|
cache->last_page = page;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user