240 lines
6.2 KiB
Markdown
240 lines
6.2 KiB
Markdown
|
|
# Phase 11.5b: Atomic操作最適化によるsync処理高速化
|
||
|
|
|
||
|
|
## 🎯 目標
|
||
|
|
Arc<Mutex>の重いロック操作を、可能な限り軽量なatomic操作に置き換えて性能を向上させる。
|
||
|
|
|
||
|
|
## 📊 現状の問題
|
||
|
|
|
||
|
|
### 現在の実装
|
||
|
|
```rust
|
||
|
|
// すべてのBox操作でMutexロック
|
||
|
|
pub fn get_field(&self, name: &str) -> Option<Box<dyn NyashBox>> {
|
||
|
|
let fields = self.fields.lock().unwrap(); // 重い!
|
||
|
|
fields.get(name).cloned()
|
||
|
|
}
|
||
|
|
|
||
|
|
// Read-onlyでも同じコスト
|
||
|
|
pub fn to_string(&self) -> String {
|
||
|
|
let value = self.value.lock().unwrap(); // 不要なロック!
|
||
|
|
value.clone()
|
||
|
|
}
|
||
|
|
```
|
||
|
|
|
||
|
|
### パフォーマンス問題
|
||
|
|
- Read操作でも排他ロックのオーバーヘッド
|
||
|
|
- 複数スレッドでのcontention
|
||
|
|
- Cache line bouncing
|
||
|
|
|
||
|
|
## 🚀 実装計画
|
||
|
|
|
||
|
|
### Step 1: Read-only path分析
|
||
|
|
```rust
|
||
|
|
// mir/readonly_analysis.rs
|
||
|
|
pub struct ReadOnlyAnalysis {
|
||
|
|
// メソッドのread-only性
|
||
|
|
readonly_methods: HashMap<(TypeId, String), bool>,
|
||
|
|
// フィールドのimmutability
|
||
|
|
immutable_fields: HashMap<(TypeId, String), bool>,
|
||
|
|
}
|
||
|
|
|
||
|
|
impl ReadOnlyAnalysis {
|
||
|
|
pub fn analyze_box_types(&mut self, registry: &BoxRegistry) {
|
||
|
|
// StringBox.length() -> read-only
|
||
|
|
self.mark_readonly("StringBox", "length");
|
||
|
|
self.mark_readonly("StringBox", "isEmpty");
|
||
|
|
|
||
|
|
// IntegerBox.value() -> read-only
|
||
|
|
self.mark_readonly("IntegerBox", "value");
|
||
|
|
|
||
|
|
// プラグインメソッドも解析
|
||
|
|
self.analyze_plugin_methods();
|
||
|
|
}
|
||
|
|
}
|
||
|
|
```
|
||
|
|
|
||
|
|
### Step 2: Atomic wrapper実装
|
||
|
|
```rust
|
||
|
|
// runtime/atomic_box.rs
|
||
|
|
pub struct AtomicBox<T: NyashBox> {
|
||
|
|
// Read-optimized RwLock
|
||
|
|
inner: Arc<RwLock<T>>,
|
||
|
|
// Atomic cache for immutable data
|
||
|
|
cached_string: AtomicPtr<String>,
|
||
|
|
cached_int: AtomicI64,
|
||
|
|
}
|
||
|
|
|
||
|
|
impl<T: NyashBox> AtomicBox<T> {
|
||
|
|
/// Lock-free read for cached values
|
||
|
|
pub fn read_cached(&self) -> Option<Box<dyn NyashBox>> {
|
||
|
|
// Atomic loadでキャッシュチェック
|
||
|
|
let ptr = self.cached_string.load(Ordering::Acquire);
|
||
|
|
if !ptr.is_null() {
|
||
|
|
unsafe {
|
||
|
|
let s = &*ptr;
|
||
|
|
return Some(Box::new(StringBox::new(s.clone())));
|
||
|
|
}
|
||
|
|
}
|
||
|
|
None
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Optimized read path
|
||
|
|
pub fn read_optimized<F, R>(&self, f: F) -> R
|
||
|
|
where
|
||
|
|
F: FnOnce(&T) -> R
|
||
|
|
{
|
||
|
|
// Try read lock first (non-blocking)
|
||
|
|
if let Ok(guard) = self.inner.try_read() {
|
||
|
|
return f(&*guard);
|
||
|
|
}
|
||
|
|
|
||
|
|
// Fallback to regular read
|
||
|
|
let guard = self.inner.read().unwrap();
|
||
|
|
f(&*guard)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
```
|
||
|
|
|
||
|
|
### Step 3: JIT最適化
|
||
|
|
```rust
|
||
|
|
// jit/lower/atomic_opt.rs
|
||
|
|
impl<'a> LoweringBuilder<'a> {
|
||
|
|
fn emit_box_method_call(&mut self,
|
||
|
|
box_val: Value,
|
||
|
|
method: &str,
|
||
|
|
args: &[Value]
|
||
|
|
) -> Value {
|
||
|
|
// Read-only解析結果確認
|
||
|
|
if self.readonly_info.is_readonly_method(box_val, method) {
|
||
|
|
// Atomic fast path
|
||
|
|
self.emit_atomic_read_path(box_val, method, args)
|
||
|
|
} else {
|
||
|
|
// 通常のMutexパス
|
||
|
|
self.emit_mutex_path(box_val, method, args)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
fn emit_atomic_read_path(&mut self,
|
||
|
|
box_val: Value,
|
||
|
|
method: &str,
|
||
|
|
args: &[Value]
|
||
|
|
) -> Value {
|
||
|
|
// 1. Atomic loadでcacheチェック
|
||
|
|
let cache_ptr = self.emit_atomic_load(box_val, "cache");
|
||
|
|
|
||
|
|
// 2. Cache hit判定
|
||
|
|
let is_valid = self.emit_null_check(cache_ptr);
|
||
|
|
|
||
|
|
// 3. 条件分岐
|
||
|
|
let then_block = self.create_block();
|
||
|
|
let else_block = self.create_block();
|
||
|
|
self.emit_branch(is_valid, then_block, else_block);
|
||
|
|
|
||
|
|
// Cache hit: lock-free return
|
||
|
|
self.switch_to_block(then_block);
|
||
|
|
let cached = self.emit_load(cache_ptr);
|
||
|
|
self.emit_jump(merge_block);
|
||
|
|
|
||
|
|
// Cache miss: RwLock読み取り
|
||
|
|
self.switch_to_block(else_block);
|
||
|
|
let value = self.emit_rwlock_read(box_val, method);
|
||
|
|
self.emit_jump(merge_block);
|
||
|
|
|
||
|
|
// Merge
|
||
|
|
self.switch_to_block(merge_block);
|
||
|
|
self.emit_phi(vec![(cached, then_block), (value, else_block)])
|
||
|
|
}
|
||
|
|
}
|
||
|
|
```
|
||
|
|
|
||
|
|
### Step 4: Memory ordering最適化
|
||
|
|
```rust
|
||
|
|
// プラットフォーム別最適化
|
||
|
|
#[cfg(target_arch = "x86_64")]
|
||
|
|
fn emit_memory_fence(&mut self, ordering: Ordering) {
|
||
|
|
match ordering {
|
||
|
|
Ordering::Relaxed => {}, // x86は強いメモリモデル
|
||
|
|
Ordering::Acquire => self.emit_lfence(),
|
||
|
|
Ordering::Release => self.emit_sfence(),
|
||
|
|
Ordering::SeqCst => self.emit_mfence(),
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[cfg(target_arch = "aarch64")]
|
||
|
|
fn emit_memory_fence(&mut self, ordering: Ordering) {
|
||
|
|
// ARMは弱いメモリモデル
|
||
|
|
match ordering {
|
||
|
|
Ordering::Relaxed => {},
|
||
|
|
Ordering::Acquire => self.emit_dmb_ld(),
|
||
|
|
Ordering::Release => self.emit_dmb_st(),
|
||
|
|
Ordering::SeqCst => self.emit_dmb(),
|
||
|
|
}
|
||
|
|
}
|
||
|
|
```
|
||
|
|
|
||
|
|
## 📈 期待される効果
|
||
|
|
|
||
|
|
### ベンチマーク
|
||
|
|
```nyash
|
||
|
|
// Read-heavy workload
|
||
|
|
function sumStringLengths(strings) {
|
||
|
|
local total = 0
|
||
|
|
loop(s in strings) {
|
||
|
|
total = total + s.length() // Atomic最適化
|
||
|
|
}
|
||
|
|
return total
|
||
|
|
}
|
||
|
|
|
||
|
|
// 性能改善
|
||
|
|
// Before: 1000ms (Mutex contention)
|
||
|
|
// After: 100ms (Lock-free reads)
|
||
|
|
```
|
||
|
|
|
||
|
|
### 改善予測
|
||
|
|
- Read操作: 90%高速化
|
||
|
|
- Read/Write混在: 50%高速化
|
||
|
|
- マルチスレッド: スケーラビリティ大幅向上
|
||
|
|
|
||
|
|
## 🔍 検証方法
|
||
|
|
|
||
|
|
### 1. Lock統計
|
||
|
|
```json
|
||
|
|
{
|
||
|
|
"total_operations": 100000,
|
||
|
|
"mutex_locks": 10000,
|
||
|
|
"atomic_reads": 90000,
|
||
|
|
"lock_reduction": 0.9,
|
||
|
|
"contention_events": 50
|
||
|
|
}
|
||
|
|
```
|
||
|
|
|
||
|
|
### 2. プロファイリング
|
||
|
|
```bash
|
||
|
|
# Mutexプロファイル
|
||
|
|
NYASH_PROFILE_LOCKS=1 ./target/release/nyash bench.nyash
|
||
|
|
|
||
|
|
# Atomic最適化後
|
||
|
|
NYASH_ATOMIC_OPT=1 ./target/release/nyash bench.nyash
|
||
|
|
```
|
||
|
|
|
||
|
|
## 🚧 実装上の注意点
|
||
|
|
|
||
|
|
1. **正確性**
|
||
|
|
- Memory orderingの正しさ
|
||
|
|
- ABA問題の回避
|
||
|
|
- Weak pointer対応
|
||
|
|
|
||
|
|
2. **互換性**
|
||
|
|
- 既存APIの維持
|
||
|
|
- プラグインとの相互運用
|
||
|
|
|
||
|
|
3. **デバッグ性**
|
||
|
|
- Race condition検出
|
||
|
|
- Lock順序の追跡
|
||
|
|
|
||
|
|
## 🎉 完了基準
|
||
|
|
|
||
|
|
- [ ] Read-only分析実装
|
||
|
|
- [ ] AtomicBox wrapper実装
|
||
|
|
- [ ] JIT統合
|
||
|
|
- [ ] マルチスレッドベンチマーク
|
||
|
|
- [ ] プラットフォーム別最適化
|