From ce372cfc7ed1f8e17dfd012bd8708af33ec3e660 Mon Sep 17 00:00:00 2001 From: "Moe Charm (CI)" Date: Fri, 12 Dec 2025 00:16:32 +0900 Subject: [PATCH] =?UTF-8?q?Phase=20V6-HDR-4:=20Headerless=20=E6=9C=80?= =?UTF-8?q?=E9=81=A9=E5=8C=96=20(P0=20+=20P1)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## P0: Double validation 排除 - region_id_lookup_v6() で TLS segment 登録済み + 範囲内なら small_page_meta_v6_of() を呼ばずに直接 page_meta を計算 - 削除された重複チェック: - slot->in_use (TLS登録で保証) - small_ptr_in_segment_v6() (addr範囲で既にチェック済み) - 関数呼び出しオーバーヘッド - 推定効果: +1-2% (6-8 instructions 削減) ## P1: TLS cache に page_meta キャッシュ追加 - RegionIdTlsCache に追加: - last_page_base / last_page_end (ページ範囲) - last_page (SmallPageMetaV6* 直接ポインタ) - region_id_lookup_cached_v6() で same-page hit 時は page_meta lookup を完全スキップ - 推定効果: +1.5-2.5% (10-12 instructions 削減) ## ベンチマーク結果 (揺れあり) - V6-HDR-3 (P0/P1 前): -3.5% ~ -8.3% 回帰 - V6-HDR-4 (P0+P1 後): +2.7% ~ +12% 改善 (一部の run で) 設計原則: - RegionIdBox は薄く保つ (分類のみ) - キャッシュは TLS 側に寄せる - same-page 判定で last_page_base/end を使用 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- core/box/region_id_v6_box.h | 8 +++- core/region_id_v6.c | 74 +++++++++++++++++++++++++++---------- 2 files changed, 60 insertions(+), 22 deletions(-) diff --git a/core/box/region_id_v6_box.h b/core/box/region_id_v6_box.h index 90f21ff6..1753f5da 100644 --- a/core/box/region_id_v6_box.h +++ b/core/box/region_id_v6_box.h @@ -122,9 +122,13 @@ void region_id_observe_unregister(uint32_t id); // ============================================================================ /// TLS cache for fast lookup (single entry) +/// Phase V6-HDR-4 P1: page_meta キャッシュ追加 typedef struct RegionIdTlsCache { - uintptr_t last_base; // Cached region base - uintptr_t last_end; // Cached region end + uintptr_t last_base; // Cached segment base + uintptr_t last_end; // Cached segment end + uintptr_t last_page_base; // Cached page base (for same-page optimization) + uintptr_t last_page_end; // Cached page end + SmallPageMetaV6* last_page; // Cached page_meta (direct pointer) RegionLookupV6 last_result; // Cached result } RegionIdTlsCache; diff --git a/core/region_id_v6.c b/core/region_id_v6.c index cdc0ce6c..11f7eebd 100644 --- a/core/region_id_v6.c +++ b/core/region_id_v6.c @@ -103,20 +103,37 @@ RegionLookupV6 region_id_lookup_v6(void* ptr) { uintptr_t addr = (uintptr_t)ptr; - // Phase V6-HDR-3: Check TLS segment registration first (fast path) + // Phase V6-HDR-4 P0: Direct page_meta calculation (Double validation 排除) + // TLS segment が登録済みかつ範囲内なら、small_page_meta_v6_of() を呼ばずに + // 直接 page_meta を計算する。これにより: + // - slot->in_use チェックの重複を排除 + // - small_ptr_in_segment_v6() の重複を排除 + // - 関数呼び出しオーバーヘッドを削減 if (g_v6_segment_registered && addr >= g_v6_segment_base && addr < g_v6_segment_end) { - // ptr is in this thread's v6 segment - get page_meta - SmallPageMetaV6* page = small_page_meta_v6_of(ptr); - if (page != NULL) { - result.kind = REGION_KIND_SMALL_V6; - result.region_id = 1; // Single TLS segment per thread - result.page_meta = page; + // Calculate page_idx directly using TLS cached base + size_t page_idx = (addr - g_v6_segment_base) >> SMALL_PAGE_V6_SHIFT; - // Update TLS cache - RegionIdTlsCache* cache = region_id_tls_cache_get(); - cache->last_base = g_v6_segment_base; - cache->last_end = g_v6_segment_end; - cache->last_result = result; + // Bounds check (should always pass if segment is valid) + if (likely(page_idx < SMALL_PAGES_PER_SEGMENT)) { + SmallPageMetaV6* page = &g_v6_segment->page_meta[page_idx]; + + // Minimal validation: only check capacity (page is in use) + if (likely(page->capacity > 0)) { + result.kind = REGION_KIND_SMALL_V6; + result.region_id = 1; + result.page_meta = page; + + // Phase V6-HDR-4 P1: Update TLS cache with page-level info + RegionIdTlsCache* cache = region_id_tls_cache_get(); + cache->last_base = g_v6_segment_base; + cache->last_end = g_v6_segment_end; + // Page-level cache: same-page ptr は page_meta lookup をスキップ可能 + uintptr_t page_base = g_v6_segment_base + (page_idx << SMALL_PAGE_V6_SHIFT); + cache->last_page_base = page_base; + cache->last_page_end = page_base + SMALL_PAGE_V6_SIZE; + cache->last_page = page; + cache->last_result = result; + } } return result; } @@ -144,16 +161,33 @@ RegionLookupV6 region_id_lookup_cached_v6(void* ptr) { RegionIdTlsCache* cache = region_id_tls_cache_get(); uintptr_t addr = (uintptr_t)ptr; - // TLS cache hit? + // Phase V6-HDR-4 P1: Same-page hit optimization + // 同一ページ内なら page_meta lookup を完全にスキップ + if (addr >= cache->last_page_base && addr < cache->last_page_end && + cache->last_page != NULL && cache->last_page->capacity > 0) { + // Same page hit - return cached page_meta directly + RegionLookupV6 result = cache->last_result; + result.page_meta = cache->last_page; + return result; + } + + // Segment hit but different page - need to recalculate page_meta if (addr >= cache->last_base && addr < cache->last_end && cache->last_result.kind != REGION_KIND_UNKNOWN) { - // Cache hit - but still need to get the correct page_meta - // for this specific ptr (page_idx may differ) - SmallPageMetaV6* page = small_page_meta_v6_of(ptr); - if (page) { - RegionLookupV6 result = cache->last_result; - result.page_meta = page; - return result; + // Calculate new page_idx + size_t page_idx = (addr - cache->last_base) >> SMALL_PAGE_V6_SHIFT; + if (likely(page_idx < SMALL_PAGES_PER_SEGMENT && g_v6_segment != NULL)) { + SmallPageMetaV6* page = &g_v6_segment->page_meta[page_idx]; + if (likely(page->capacity > 0)) { + RegionLookupV6 result = cache->last_result; + result.page_meta = page; + // Update page-level cache + uintptr_t page_base = cache->last_base + (page_idx << SMALL_PAGE_V6_SHIFT); + cache->last_page_base = page_base; + cache->last_page_end = page_base + SMALL_PAGE_V6_SIZE; + cache->last_page = page; + return result; + } } }