Phase V6-HDR-4: Headerless 最適化 (P0 + P1)

## P0: Double validation 排除
- region_id_lookup_v6() で TLS segment 登録済み + 範囲内なら
  small_page_meta_v6_of() を呼ばずに直接 page_meta を計算
- 削除された重複チェック:
  - slot->in_use (TLS登録で保証)
  - small_ptr_in_segment_v6() (addr範囲で既にチェック済み)
  - 関数呼び出しオーバーヘッド
- 推定効果: +1-2% (6-8 instructions 削減)

## P1: TLS cache に page_meta キャッシュ追加
- RegionIdTlsCache に追加:
  - last_page_base / last_page_end (ページ範囲)
  - last_page (SmallPageMetaV6* 直接ポインタ)
- region_id_lookup_cached_v6() で same-page hit 時は
  page_meta lookup を完全スキップ
- 推定効果: +1.5-2.5% (10-12 instructions 削減)

## ベンチマーク結果 (揺れあり)
- V6-HDR-3 (P0/P1 前): -3.5% ~ -8.3% 回帰
- V6-HDR-4 (P0+P1 後): +2.7% ~ +12% 改善 (一部の run で)

設計原則:
- RegionIdBox は薄く保つ (分類のみ)
- キャッシュは TLS 側に寄せる
- same-page 判定で last_page_base/end を使用

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-12-12 00:16:32 +09:00
parent 969170c0fb
commit ce372cfc7e
2 changed files with 60 additions and 22 deletions

View File

@ -122,9 +122,13 @@ void region_id_observe_unregister(uint32_t id);
// ============================================================================
/// TLS cache for fast lookup (single entry)
/// Phase V6-HDR-4 P1: page_meta キャッシュ追加
typedef struct RegionIdTlsCache {
uintptr_t last_base; // Cached region base
uintptr_t last_end; // Cached region end
uintptr_t last_base; // Cached segment base
uintptr_t last_end; // Cached segment end
uintptr_t last_page_base; // Cached page base (for same-page optimization)
uintptr_t last_page_end; // Cached page end
SmallPageMetaV6* last_page; // Cached page_meta (direct pointer)
RegionLookupV6 last_result; // Cached result
} RegionIdTlsCache;

View File

@ -103,21 +103,38 @@ RegionLookupV6 region_id_lookup_v6(void* ptr) {
uintptr_t addr = (uintptr_t)ptr;
// Phase V6-HDR-3: Check TLS segment registration first (fast path)
// Phase V6-HDR-4 P0: Direct page_meta calculation (Double validation 排除)
// TLS segment が登録済みかつ範囲内なら、small_page_meta_v6_of() を呼ばずに
// 直接 page_meta を計算する。これにより:
// - slot->in_use チェックの重複を排除
// - small_ptr_in_segment_v6() の重複を排除
// - 関数呼び出しオーバーヘッドを削減
if (g_v6_segment_registered && addr >= g_v6_segment_base && addr < g_v6_segment_end) {
// ptr is in this thread's v6 segment - get page_meta
SmallPageMetaV6* page = small_page_meta_v6_of(ptr);
if (page != NULL) {
// Calculate page_idx directly using TLS cached base
size_t page_idx = (addr - g_v6_segment_base) >> SMALL_PAGE_V6_SHIFT;
// Bounds check (should always pass if segment is valid)
if (likely(page_idx < SMALL_PAGES_PER_SEGMENT)) {
SmallPageMetaV6* page = &g_v6_segment->page_meta[page_idx];
// Minimal validation: only check capacity (page is in use)
if (likely(page->capacity > 0)) {
result.kind = REGION_KIND_SMALL_V6;
result.region_id = 1; // Single TLS segment per thread
result.region_id = 1;
result.page_meta = page;
// Update TLS cache
// Phase V6-HDR-4 P1: Update TLS cache with page-level info
RegionIdTlsCache* cache = region_id_tls_cache_get();
cache->last_base = g_v6_segment_base;
cache->last_end = g_v6_segment_end;
// Page-level cache: same-page ptr は page_meta lookup をスキップ可能
uintptr_t page_base = g_v6_segment_base + (page_idx << SMALL_PAGE_V6_SHIFT);
cache->last_page_base = page_base;
cache->last_page_end = page_base + SMALL_PAGE_V6_SIZE;
cache->last_page = page;
cache->last_result = result;
}
}
return result;
}
@ -144,18 +161,35 @@ RegionLookupV6 region_id_lookup_cached_v6(void* ptr) {
RegionIdTlsCache* cache = region_id_tls_cache_get();
uintptr_t addr = (uintptr_t)ptr;
// TLS cache hit?
// Phase V6-HDR-4 P1: Same-page hit optimization
// 同一ページ内なら page_meta lookup を完全にスキップ
if (addr >= cache->last_page_base && addr < cache->last_page_end &&
cache->last_page != NULL && cache->last_page->capacity > 0) {
// Same page hit - return cached page_meta directly
RegionLookupV6 result = cache->last_result;
result.page_meta = cache->last_page;
return result;
}
// Segment hit but different page - need to recalculate page_meta
if (addr >= cache->last_base && addr < cache->last_end &&
cache->last_result.kind != REGION_KIND_UNKNOWN) {
// Cache hit - but still need to get the correct page_meta
// for this specific ptr (page_idx may differ)
SmallPageMetaV6* page = small_page_meta_v6_of(ptr);
if (page) {
// Calculate new page_idx
size_t page_idx = (addr - cache->last_base) >> SMALL_PAGE_V6_SHIFT;
if (likely(page_idx < SMALL_PAGES_PER_SEGMENT && g_v6_segment != NULL)) {
SmallPageMetaV6* page = &g_v6_segment->page_meta[page_idx];
if (likely(page->capacity > 0)) {
RegionLookupV6 result = cache->last_result;
result.page_meta = page;
// Update page-level cache
uintptr_t page_base = cache->last_base + (page_idx << SMALL_PAGE_V6_SHIFT);
cache->last_page_base = page_base;
cache->last_page_end = page_base + SMALL_PAGE_V6_SIZE;
cache->last_page = page;
return result;
}
}
}
// Cache miss -> slow path
return region_id_lookup_v6(ptr);