feat(perf): add Phase 21.8 foundation for IntArrayCore/MatI64 numeric boxes
Prepare infrastructure for specialized numeric array benchmarking: - Add IntArrayCore plugin stub (crates/nyash_kernel/src/plugin/intarray.rs) - Add IntArrayCore/MatI64 box definitions (lang/src/runtime/numeric/) - Add Phase 21.8 documentation and task tracking - Update nyash.toml/hako.toml with numeric library configuration - Extend microbench.sh for matmul_core benchmark case Next: Resolve Stage-B MirBuilder to recognize MatI64/IntArrayCore as boxes 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -1,4 +1,23 @@
|
|||||||
# Current Task — Phase 21.7(Normalization & Unification: Methodize Static Boxes)
|
# Current Task — Phase 21.8(Numeric Core Integration & Builder Support)
|
||||||
|
|
||||||
|
Update (2025-11-14 — 21.8 kickoff: MatI64/IntArrayCore builder integration)
|
||||||
|
- Context:
|
||||||
|
- 21.5: AotPrep/CollectionsHot v1 + microbench整備まで完了(linidx/maplin ≒ C=100%)。arraymap/matmul は次フェーズ送り。
|
||||||
|
- 21.6: NyRT IntArrayCore + Hako IntArrayCore/MatI64/matmul_core スケルトン実装まで完了(builder 経路未対応)。
|
||||||
|
- Current goal:
|
||||||
|
- Hakorune selfhost chain(Stage‑B → MirBuilder → ny‑llvmc(crate))に IntArrayCore/MatI64 を統合し、`matmul_core` ベンチを EXE ラインで実行できるようにする。
|
||||||
|
- 実装は Claude Code 担当、このホストは仕様・構造・診断の整理に専念。
|
||||||
|
|
||||||
|
Planned tasks (for Claude Code)
|
||||||
|
1) Fix MatI64 visibility in Stage‑B / MirBuilder
|
||||||
|
- Reproduce provider error: `[mirbuilder/parse/error] undefined variable: MatI64` from `env.mirbuilder.emit` when compiling a small test using `using nyash.core.numeric.matrix_i64 as MatI64`.
|
||||||
|
- Wire `nyash.core.numeric.matrix_i64` / `nyash.core.numeric.intarray` modules into the resolver/prelude so that Stage‑B/MirBuilder can see MatI64 and IntArrayCore like other core boxes.
|
||||||
|
2) Make `tools/hakorune_emit_mir.sh` emit MIR(JSON) for `matmul_core`
|
||||||
|
- With `HAKO_APPLY_AOT_PREP=1 NYASH_AOT_COLLECTIONS_HOT=1 NYASH_LLVM_FAST=1 NYASH_MIR_LOOP_HOIST=1 NYASH_JSON_ONLY=1`, emit MIR(JSON) for the `matmul_core` case in microbench and ensure no undefined‑variable errors.
|
||||||
|
3) Finish `matmul_core` bench EXE path
|
||||||
|
- Confirm `tools/perf/microbench.sh --case matmul_core --backend llvm --exe --runs 1 --n 64` builds EXE and runs, logging ratio vs the matching C implementation.
|
||||||
|
4) Keep defaults stable
|
||||||
|
- No behaviour changes for existing code/benches; IntArrayCore/MatI64 integration is additive and behind explicit use.
|
||||||
|
|
||||||
Update (2025-11-14 — CollectionsHot rewrite expansion, waiting for Claude Code)
|
Update (2025-11-14 — CollectionsHot rewrite expansion, waiting for Claude Code)
|
||||||
- Status: pending (waiting on Claude Code to land rewrite coverage improvements)
|
- Status: pending (waiting on Claude Code to land rewrite coverage improvements)
|
||||||
|
|||||||
159
crates/nyash_kernel/src/plugin/intarray.rs
Normal file
159
crates/nyash_kernel/src/plugin/intarray.rs
Normal file
@ -0,0 +1,159 @@
|
|||||||
|
// IntArrayCore helpers for AOT/VM bridge (handle-based, ring1 numeric core)
|
||||||
|
// API (Hako-facing via externcall):
|
||||||
|
// - nyash.intarray.new_h(len) -> handle (IntArrayCore)
|
||||||
|
// - nyash.intarray.len_h(h) -> i64
|
||||||
|
// - nyash.intarray.get_hi(h,i) -> i64
|
||||||
|
// - nyash.intarray.set_hii(h,i,v) -> i64 (0=ok, non-zero=error)
|
||||||
|
|
||||||
|
use nyash_rust::{
|
||||||
|
box_trait::{BoxCore, NyashBox, StringBox},
|
||||||
|
boxes::basic::BoolBox,
|
||||||
|
runtime::host_handles as handles,
|
||||||
|
};
|
||||||
|
use std::any::Any;
|
||||||
|
use std::sync::RwLock;
|
||||||
|
|
||||||
|
/// Minimal numeric core: contiguous i64 buffer + length.
|
||||||
|
/// This box is intended for internal numeric kernels (matmul_core 等) 専用で、
|
||||||
|
/// 一般APIは .hako 側のラッパー(MatI64 等)から利用する。
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct IntArrayCore {
|
||||||
|
base: nyash_rust::box_trait::BoxBase,
|
||||||
|
data: RwLock<Vec<i64>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl IntArrayCore {
|
||||||
|
pub fn new(len: i64) -> Self {
|
||||||
|
let n = if len <= 0 { 0 } else { len as usize };
|
||||||
|
IntArrayCore {
|
||||||
|
base: nyash_rust::box_trait::BoxBase::new(),
|
||||||
|
data: RwLock::new(vec![0; n]),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn len_i64(&self) -> i64 {
|
||||||
|
self.data.read().unwrap().len() as i64
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_i64(&self, idx: i64) -> Option<i64> {
|
||||||
|
if idx < 0 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let i = idx as usize;
|
||||||
|
let guard = self.data.read().unwrap();
|
||||||
|
guard.get(i).copied()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_i64(&self, idx: i64, v: i64) -> bool {
|
||||||
|
if idx < 0 {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
let i = idx as usize;
|
||||||
|
let mut guard = self.data.write().unwrap();
|
||||||
|
if i >= guard.len() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
guard[i] = v;
|
||||||
|
true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BoxCore for IntArrayCore {
|
||||||
|
fn box_id(&self) -> u64 {
|
||||||
|
self.base.id
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parent_type_id(&self) -> Option<std::any::TypeId> {
|
||||||
|
self.base.parent_type_id
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fmt_box(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
|
write!(f, "IntArrayCore(len={})", self.data.len())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_any(&self) -> &dyn Any {
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_any_mut(&mut self) -> &mut dyn Any {
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NyashBox for IntArrayCore {
|
||||||
|
fn to_string_box(&self) -> StringBox {
|
||||||
|
StringBox::new(&format!("IntArrayCore(len={})", self.data.len()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn equals(&self, other: &dyn NyashBox) -> BoolBox {
|
||||||
|
if let Some(o) = other.as_any().downcast_ref::<IntArrayCore>() {
|
||||||
|
BoolBox::new(self.data == o.data)
|
||||||
|
} else {
|
||||||
|
BoolBox::new(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn clone_box(&self) -> Box<dyn NyashBox> {
|
||||||
|
Box::new(IntArrayCore {
|
||||||
|
base: self.base.clone(),
|
||||||
|
data: RwLock::new(self.data.read().unwrap().clone()),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn share_box(&self) -> Box<dyn NyashBox> {
|
||||||
|
// Identity semantics are not required here; clone is fine.
|
||||||
|
self.clone_box()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Extern API (handle-based) ---
|
||||||
|
|
||||||
|
fn get_core(handle: i64) -> Option<std::sync::Arc<dyn NyashBox>> {
|
||||||
|
if handle <= 0 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
handles::get(handle as u64)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[export_name = \"nyash.intarray.new_h\"]
|
||||||
|
pub extern \"C\" fn nyash_intarray_new_h(len: i64) -> i64 {
|
||||||
|
let core = IntArrayCore::new(len);
|
||||||
|
let arc: std::sync::Arc<dyn NyashBox> = std::sync::Arc::new(core);
|
||||||
|
let h = handles::to_handle_arc(arc) as i64;
|
||||||
|
if std::env::var(\"NYASH_CLI_VERBOSE\").ok().as_deref() == Some(\"1\") {
|
||||||
|
eprintln!(\"[INTARRAY] new_h(len={}) -> handle={}\", len, h);
|
||||||
|
}
|
||||||
|
h
|
||||||
|
}
|
||||||
|
|
||||||
|
#[export_name = \"nyash.intarray.len_h\"]
|
||||||
|
pub extern \"C\" fn nyash_intarray_len_h(handle: i64) -> i64 {
|
||||||
|
if let Some(obj) = get_core(handle) {
|
||||||
|
if let Some(core) = obj.as_any().downcast_ref::<IntArrayCore>() {
|
||||||
|
return core.len_i64();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
0
|
||||||
|
}
|
||||||
|
|
||||||
|
#[export_name = \"nyash.intarray.get_hi\"]
|
||||||
|
pub extern \"C\" fn nyash_intarray_get_hi(handle: i64, idx: i64) -> i64 {
|
||||||
|
if let Some(obj) = get_core(handle) {
|
||||||
|
if let Some(core) = obj.as_any().downcast_ref::<IntArrayCore>() {
|
||||||
|
if let Some(v) = core.get_i64(idx) {
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
0
|
||||||
|
}
|
||||||
|
|
||||||
|
#[export_name = \"nyash.intarray.set_hii\"]
|
||||||
|
pub extern \"C\" fn nyash_intarray_set_hii(handle: i64, idx: i64, val: i64) -> i64 {
|
||||||
|
if let Some(obj) = get_core(handle) {
|
||||||
|
if let Some(core) = obj.as_any().downcast_ref::<IntArrayCore>() {
|
||||||
|
return if core.set_i64(idx, val) { 0 } else { 1 };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
1
|
||||||
|
}
|
||||||
@ -6,6 +6,7 @@ pub mod instance;
|
|||||||
pub mod invoke;
|
pub mod invoke;
|
||||||
pub mod invoke_core;
|
pub mod invoke_core;
|
||||||
pub mod map;
|
pub mod map;
|
||||||
|
pub mod intarray;
|
||||||
pub mod semantics;
|
pub mod semantics;
|
||||||
pub mod string;
|
pub mod string;
|
||||||
|
|
||||||
@ -17,5 +18,6 @@ pub use instance::*;
|
|||||||
pub use invoke::*;
|
pub use invoke::*;
|
||||||
pub use invoke_core::*;
|
pub use invoke_core::*;
|
||||||
pub use map::*;
|
pub use map::*;
|
||||||
|
pub use intarray::*;
|
||||||
pub use semantics::*;
|
pub use semantics::*;
|
||||||
pub use string::*;
|
pub use string::*;
|
||||||
|
|||||||
@ -4,7 +4,7 @@
|
|||||||
- .hako 側(AotPrep)で前処理最適化(構造のみ)を行い、LLVM/AOT に渡すIRを軽量にする。
|
- .hako 側(AotPrep)で前処理最適化(構造のみ)を行い、LLVM/AOT に渡すIRを軽量にする。
|
||||||
- 既定は挙動不変(opt‑in)。Return 純化ガードで安全性を担保。
|
- 既定は挙動不変(opt‑in)。Return 純化ガードで安全性を担保。
|
||||||
|
|
||||||
チェックリスト
|
チェックリスト(21.5 時点の着地)
|
||||||
- [x] パス分割(StrlenFold / LoopHoist / ConstDedup / CollectionsHot / BinopCSE)
|
- [x] パス分割(StrlenFold / LoopHoist / ConstDedup / CollectionsHot / BinopCSE)
|
||||||
- [x] CollectionsHot(Array/Map)導入(既定OFF)
|
- [x] CollectionsHot(Array/Map)導入(既定OFF)
|
||||||
- [x] Map key モード `NYASH_AOT_MAP_KEY_MODE={h|i64|hh|auto}`
|
- [x] Map key モード `NYASH_AOT_MAP_KEY_MODE={h|i64|hh|auto}`
|
||||||
@ -17,6 +17,11 @@
|
|||||||
- [ ] Idempotence(置換済みタグで再実行時も不変)
|
- [ ] Idempotence(置換済みタグで再実行時も不変)
|
||||||
- [ ] `arraymap`/`matmul` ≤ 125%(C基準)
|
- [ ] `arraymap`/`matmul` ≤ 125%(C基準)
|
||||||
|
|
||||||
|
メモ(21.5 クロージング)
|
||||||
|
- linidx/maplin など「線形インデックス+Array/Map」系は CollectionsHot + hoist/CSE で C≒100% 近辺まで到達。
|
||||||
|
- arraymap は Array/Map 部分の externcall 化は進んだものの、文字列キー生成(toString/`\"k\"+idx`)と hash パスが支配的なため、C の単純 int[] とは根本的に前提が異なる状態で終了。
|
||||||
|
- matmul は CollectionsHot 自体は単体では効いているが、行列積そのものが ArrayBox ベースであり、Core 数値箱不在のまま 80% 目標には届かず。これは 21.6 以降の「Core 数値箱+行列箱」導入で扱う。
|
||||||
|
|
||||||
トグル
|
トグル
|
||||||
- `NYASH_MIR_LOOP_HOIST=1` … StrlenFold/LoopHoist/ConstDedup/BinopCSE を有効化
|
- `NYASH_MIR_LOOP_HOIST=1` … StrlenFold/LoopHoist/ConstDedup/BinopCSE を有効化
|
||||||
- `NYASH_AOT_COLLECTIONS_HOT=1` … CollectionsHot(Array/Map)
|
- `NYASH_AOT_COLLECTIONS_HOT=1` … CollectionsHot(Array/Map)
|
||||||
|
|||||||
@ -0,0 +1,90 @@
|
|||||||
|
# Phase 21.6 — Core Numeric Boxes (Draft)
|
||||||
|
|
||||||
|
Status: proposal (to refine at 21.6 kickoff)
|
||||||
|
|
||||||
|
## Goal
|
||||||
|
|
||||||
|
Provide explicit, low‑level numeric boxes that:
|
||||||
|
|
||||||
|
- Give Nyash a “fair” core for int/f64 benchmarks against C.
|
||||||
|
- Stay compatible with the existing ArrayBox API (no breaking changes).
|
||||||
|
- Can be used both explicitly in `.hako` and (later) as conservative AotPrep targets.
|
||||||
|
|
||||||
|
This phase focuses on design + minimal implementation; aggressive auto‑rewrites stay behind opt‑in flags.
|
||||||
|
|
||||||
|
## Scope (21.6)
|
||||||
|
|
||||||
|
- Design and add **IntArrayCore** numeric core (NyRT + Hako wrapper):
|
||||||
|
- NyRT: `IntArrayCore` box(Rust)with internal layout `Vec<i64>`(contiguous, row‑major semantics)。
|
||||||
|
- Hako: `IntArrayCoreBox` in `nyash.core.numeric.intarray`, wrapping NyRT via externcall:
|
||||||
|
- `static new(len: i64) -> IntArrayCoreBox` → `nyash.intarray.new_h`
|
||||||
|
- `length(self) -> i64` → `nyash.intarray.len_h`
|
||||||
|
- `get_unchecked(self, idx: i64) -> i64` → `nyash.intarray.get_hi`
|
||||||
|
- `set_unchecked(self, idx: i64, v: i64)` → `nyash.intarray.set_hii`
|
||||||
|
- Semantics: i64‑only、固定長(構造変更なし)。境界チェックは NyRT 側(Fail‑Fast)に限定し、Hako 側は数値カーネル専用の薄いラッパーに留める。
|
||||||
|
|
||||||
|
- Design and add **MatI64** (matrix box) on top of IntArrayCore:
|
||||||
|
- Internal layout: `rows: i64`, `cols: i64`, `stride: i64`, `core: IntArrayCoreBox`.
|
||||||
|
- Minimal API:
|
||||||
|
- `new(rows: i64, cols: i64) -> MatI64`
|
||||||
|
- `rows(self) -> i64`, `cols(self) -> i64`
|
||||||
|
- `at(self, r: i64, c: i64) -> i64`
|
||||||
|
- `set(self, r: i64, c: i64, v: i64)`
|
||||||
|
- Provide one reference implementation:
|
||||||
|
- `MatOps.matmul_naive(a: MatI64, b: MatI64) -> MatI64` (O(n³), clear structure, not tuned).
|
||||||
|
|
||||||
|
- Bench alignment:
|
||||||
|
- Add `matmul_core` benchmark:
|
||||||
|
- Nyash: MatI64 + IntArrayCore implementation.
|
||||||
|
- C: struct `{ int64_t *ptr; int64_t rows; int64_t cols; int64_t stride; }` + helper `get/set`.
|
||||||
|
- Keep existing `matmul` (ArrayBox vs raw `int*`) as “language‑level” benchmark.
|
||||||
|
|
||||||
|
Out of scope for 21.6:
|
||||||
|
|
||||||
|
- Auto‑rewrite from `ArrayBox` → `IntArrayCore` / `MatI64` in AotPrep (only sketched, not default).
|
||||||
|
- SIMD / blocked matmul / cache‑tuned kernels (can be separate optimization phases).
|
||||||
|
- f64/complex variants (only type skeletons, if any).
|
||||||
|
|
||||||
|
## Design Notes
|
||||||
|
|
||||||
|
- **Layering**
|
||||||
|
- Core: IntArrayCore (and future F64ArrayCore) are “muscle” boxes: minimal, numeric‑only. NyRT では IntArrayCore(Rust)、Hako では IntArrayCoreBox として露出。
|
||||||
|
- Matrix: MatI64 expresses 2D shape and indexing; it owns an IntArrayCoreBox.
|
||||||
|
- High‑level: ArrayBox / MapBox / existing user APIs remain unchanged.
|
||||||
|
|
||||||
|
- **Hako ABI vs Nyash implementation**
|
||||||
|
- IntArrayCore lives as a NyRT box (C/Rust implementation) exposed via Hako ABI (`nyash.intarray.*`).
|
||||||
|
- IntArrayCoreBox, MatI64 and MatOps are written in Nyash, calling IntArrayCore via externcall while exposing boxcall APIs to user code.
|
||||||
|
- This keeps heavy lifting in NyRT while keeping the 2D semantics in `.hako`.
|
||||||
|
|
||||||
|
- **Fair C comparison**
|
||||||
|
- For `matmul_core`, C should mirror IntArrayCore/MatI64:
|
||||||
|
- Same struct layout (ptr + len / rows + cols + stride).
|
||||||
|
- Same naive O(n³) algorithm.
|
||||||
|
- This separates:
|
||||||
|
- “Nyash vs C as languages” → existing `matmul` (ArrayBox vs `int*`).
|
||||||
|
- “Core numeric kernel parity” → new `matmul_core` (IntArrayCore vs equivalent C).
|
||||||
|
|
||||||
|
## AotPrep / Future Work (21.6+)
|
||||||
|
|
||||||
|
Not for default in 21.6, but to keep in mind:
|
||||||
|
|
||||||
|
- Add conservative patterns in Collections/AotPrep to detect:
|
||||||
|
- `ArrayBox<i64>` with:
|
||||||
|
- Fixed length.
|
||||||
|
- No structural mutations after initialization.
|
||||||
|
- Access patterns of the form `base + i*cols + j` (or similar linear forms).
|
||||||
|
- Allow opt‑in rewrite from such patterns to IntArrayCore/MatI64 calls.
|
||||||
|
|
||||||
|
- Keep all auto‑rewrites:
|
||||||
|
- Behind env toggles (e.g. `NYASH_AOT_INTARRAY_CORE=1`).
|
||||||
|
- Semantics‑preserving by construction; fall back to ArrayBox path when unsure.
|
||||||
|
|
||||||
|
## Open Questions for 21.6 Kickoff
|
||||||
|
|
||||||
|
- Exact module names:
|
||||||
|
- `nyash.core.intarray` / `nyash.core.matrix` vs `nyash.linalg.*`.
|
||||||
|
- Bounds checking policy for IntArrayCore:
|
||||||
|
- Always on (fail‑fast) vs dev toggle for light checks in hot loops.
|
||||||
|
- Interop:
|
||||||
|
- Whether MatI64 should expose its IntArrayCore (e.g. `as_core_row_major()`) for advanced users.
|
||||||
85
docs/development/roadmap/phases/phase-21.8/README.md
Normal file
85
docs/development/roadmap/phases/phase-21.8/README.md
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
# Phase 21.8 — Numeric Core Integration & Builder Support
|
||||||
|
|
||||||
|
Status: proposal (to hand off to Claude Code)
|
||||||
|
|
||||||
|
## Goal
|
||||||
|
|
||||||
|
Integrate the new numeric core boxes (IntArrayCore + MatI64) into the Hakorune selfhost chain so that:
|
||||||
|
|
||||||
|
- Stage‑B → MirBuilder → ny‑llvmc(crate) can emit MIR(JSON) and EXE for code that uses:
|
||||||
|
- `using nyash.core.numeric.intarray as IntArrayCore`
|
||||||
|
- `using nyash.core.numeric.matrix_i64 as MatI64`
|
||||||
|
- The `matmul_core` microbench (MatI64 + IntArrayCore) runs end‑to‑end in EXE mode and can be compared fairly against a matching C implementation.
|
||||||
|
|
||||||
|
21.6 provides the core boxes; 21.8 focuses on wiring them into the builder/runtime chain without changing default behaviour for other code.
|
||||||
|
|
||||||
|
## Scope (21.8, this host)
|
||||||
|
|
||||||
|
- Stage‑B / MirBuilder:
|
||||||
|
- Ensure `MatI64` and `IntArrayCore` are recognized as valid boxes when referenced via:
|
||||||
|
- `using nyash.core.numeric.matrix_i64 as MatI64`
|
||||||
|
- `using nyash.core.numeric.intarray as IntArrayCore`
|
||||||
|
- Fix the current provider‑emit failure:
|
||||||
|
- Error today: `[mirbuilder/parse/error] undefined variable: MatI64` during `env.mirbuilder.emit`.
|
||||||
|
- Diagnose and adjust Stage‑B / MirBuilder so that static box references (`MatI64.new`, `A.mul_naive`) compile in the same way as other boxes.
|
||||||
|
|
||||||
|
- AotPrep / emit pipeline:
|
||||||
|
- Keep AotPrep unchanged for now; the goal is to make `tools/hakorune_emit_mir.sh` succeed on `matmul_core` sources without special‑casing.
|
||||||
|
- Ensure `tools/hakorune_emit_mir.sh` with:
|
||||||
|
- `HAKO_APPLY_AOT_PREP=1 NYASH_AOT_COLLECTIONS_HOT=1 NYASH_LLVM_FAST=1 NYASH_MIR_LOOP_HOIST=1`
|
||||||
|
- can emit valid MIR(JSON) for MatI64/IntArrayCore code.
|
||||||
|
|
||||||
|
- Microbench integration:
|
||||||
|
- Finish wiring `matmul_core` in `tools/perf/microbench.sh`:
|
||||||
|
- Hako side: MatI64/IntArrayCore based O(n³) matmul (`MatI64.mul_naive`).
|
||||||
|
- C side: `MatI64Core { int64_t *ptr; rows; cols; stride; }` with identical algorithm.
|
||||||
|
- Accept that performance may still be far from the 80% target; 21.8 focuses on **structural integration and parity**, not tuning.
|
||||||
|
|
||||||
|
Out of scope:
|
||||||
|
|
||||||
|
- New optimizations inside AotPrep / CollectionsHot.
|
||||||
|
- SIMD/blocked matmul kernels (to be handled in a later optimization phase).
|
||||||
|
- f64/complex matrix variants.
|
||||||
|
|
||||||
|
## Tasks for implementation (Claude Code)
|
||||||
|
|
||||||
|
1) **Fix MatI64 visibility in Stage‑B / MirBuilder**
|
||||||
|
- Reproduce the current failure:
|
||||||
|
- Use a small `.hako` like:
|
||||||
|
- `using nyash.core.numeric.matrix_i64 as MatI64`
|
||||||
|
- `static box Main { method main(args) { local n = 4; local A = MatI64.new(n,n); return A.at(0,0); } }`
|
||||||
|
- Confirm `env.mirbuilder.emit` reports `undefined variable: MatI64`.
|
||||||
|
- Investigate how modules from `nyash.toml` (`"nyash.core.numeric.matrix_i64" = "lang/src/runtime/numeric/mat_i64_box.hako"`) are made visible to Stage‑B and MirBuilder.
|
||||||
|
- Adjust the resolver / module prelude so that `MatI64` (and `IntArrayCore`) are treated like other core boxes:
|
||||||
|
- Either via explicit prelude inclusion,
|
||||||
|
- Or via module registry entries consumed by the builder.
|
||||||
|
|
||||||
|
2) **Ensure `tools/hakorune_emit_mir.sh` can emit MIR(JSON) for matmul_core**
|
||||||
|
- Once MatI64 is visible, run:
|
||||||
|
- `HAKO_APPLY_AOT_PREP=1 NYASH_AOT_COLLECTIONS_HOT=1 NYASH_LLVM_FAST=1 NYASH_MIR_LOOP_HOIST=1 NYASH_JSON_ONLY=1 tools/hakorune_emit_mir.sh <matmul_core.hako> tmp/matmul_core.json`
|
||||||
|
- Acceptance:
|
||||||
|
- No `undefined variable: MatI64` / `IntArrayCore` errors.
|
||||||
|
- `tmp/matmul_core.json` is valid MIR(JSON) (same schema as existing matmul case).
|
||||||
|
|
||||||
|
3) **Finish `matmul_core` microbench**
|
||||||
|
- Use the existing skeleton in `tools/perf/microbench.sh` (`case matmul_core`):
|
||||||
|
- Confirm Hako side compiles and runs under `--backend vm`.
|
||||||
|
- Confirm EXE path works:
|
||||||
|
- `NYASH_SKIP_TOML_ENV=1 NYASH_LLVM_SKIP_BUILD=1 tools/perf/microbench.sh --case matmul_core --backend llvm --exe --runs 1 --n 64`
|
||||||
|
- Update `benchmarks/README.md`:
|
||||||
|
- Add `matmul_core` row with a short description:
|
||||||
|
- “MatI64/IntArrayCore vs MatI64Core C struct (ptr+rows+cols+stride)”
|
||||||
|
- Record initial ratios (even if far from 80%).
|
||||||
|
|
||||||
|
4) **Keep existing behaviour stable**
|
||||||
|
- No changes to default user behaviour, env toggles, or existing benches beyond adding `matmul_core`.
|
||||||
|
- Ensure quick/profile smokes (where applicable) remain green with numeric core present.
|
||||||
|
|
||||||
|
## Notes
|
||||||
|
|
||||||
|
- 21.6 already introduced:
|
||||||
|
- NyRT `IntArrayCore` (Vec<i64> + RwLock) and handle‑based externs (`nyash.intarray.*`).
|
||||||
|
- Hako wrappers `IntArrayCore` and `MatI64` in `lang/src/runtime/numeric/`.
|
||||||
|
- `nyash.toml` module aliases for `nyash.core.numeric.intarray` and `nyash.core.numeric.matrix_i64`.
|
||||||
|
- 21.8 is about wiring these into the builder/emit chain so that Hakorune can compile and benchmark numeric core code end‑to‑end.
|
||||||
|
|
||||||
4
hako.toml
Normal file
4
hako.toml
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
[env]
|
||||||
|
# This file mirrors nyash.toml for Hakorune/Hako tools.
|
||||||
|
# Primary runtime config remains nyash.toml; hako.toml exists as a dev-facing alias.
|
||||||
|
|
||||||
28
lang/src/runtime/numeric/intarray_core_box.hako
Normal file
28
lang/src/runtime/numeric/intarray_core_box.hako
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
// IntArrayCoreBox — thin Hako wrapper over NyRT IntArrayCore (handle-based)
|
||||||
|
// This provides a box-level API for numeric kernels while delegating storage
|
||||||
|
// to nyash.intarray.* extern calls in NyRT (ring1).
|
||||||
|
|
||||||
|
static box IntArrayCore {
|
||||||
|
init { handle, len }
|
||||||
|
|
||||||
|
static new(len) {
|
||||||
|
local h = externcall "nyash.intarray.new_h"(len)
|
||||||
|
local b = new IntArrayCore()
|
||||||
|
b.handle = h
|
||||||
|
b.len = len
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
length() {
|
||||||
|
return externcall "nyash.intarray.len_h"(me.handle)
|
||||||
|
}
|
||||||
|
|
||||||
|
get_unchecked(idx) {
|
||||||
|
return externcall "nyash.intarray.get_hi"(me.handle, idx)
|
||||||
|
}
|
||||||
|
|
||||||
|
set_unchecked(idx, v) {
|
||||||
|
externcall "nyash.intarray.set_hii"(me.handle, idx, v)
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
}
|
||||||
65
lang/src/runtime/numeric/mat_i64_box.hako
Normal file
65
lang/src/runtime/numeric/mat_i64_box.hako
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
// MatI64 — simple i64 matrix box built on top of IntArrayCore.
|
||||||
|
// Internal layout: rows, cols, stride, core (IntArrayCore).
|
||||||
|
|
||||||
|
using nyash.core.numeric.intarray as IntArrayCore
|
||||||
|
|
||||||
|
static box MatI64 {
|
||||||
|
init { rows, cols, stride, core }
|
||||||
|
|
||||||
|
static new(rows, cols) {
|
||||||
|
local total = rows * cols
|
||||||
|
local core = IntArrayCore.new(total)
|
||||||
|
local m = new MatI64()
|
||||||
|
m.rows = rows
|
||||||
|
m.cols = cols
|
||||||
|
m.stride = cols
|
||||||
|
m.core = core
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
rowsCount() {
|
||||||
|
return me.rows
|
||||||
|
}
|
||||||
|
|
||||||
|
colsCount() {
|
||||||
|
return me.cols
|
||||||
|
}
|
||||||
|
|
||||||
|
at(r, c) {
|
||||||
|
local idx = r * me.stride + c
|
||||||
|
return me.core.get_unchecked(idx)
|
||||||
|
}
|
||||||
|
|
||||||
|
set(r, c, v) {
|
||||||
|
local idx = r * me.stride + c
|
||||||
|
me.core.set_unchecked(idx, v)
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
// Naive O(n^3) matmul: this * b
|
||||||
|
mul_naive(b) {
|
||||||
|
local n = me.rows
|
||||||
|
local mcols = me.cols
|
||||||
|
local bcols = b.cols
|
||||||
|
// assume shapes are compatible and square for now (Phase 21.6 draft)
|
||||||
|
local out = MatI64.new(n, bcols)
|
||||||
|
local i = 0
|
||||||
|
loop(i < n) {
|
||||||
|
local k = 0
|
||||||
|
loop(k < mcols) {
|
||||||
|
local aik = me.at(i, k)
|
||||||
|
local j = 0
|
||||||
|
loop(j < bcols) {
|
||||||
|
local idx = i * out.stride + j
|
||||||
|
local v = out.core.get_unchecked(idx) + aik * b.at(k, j)
|
||||||
|
out.core.set_unchecked(idx, v)
|
||||||
|
j = j + 1
|
||||||
|
}
|
||||||
|
k = k + 1
|
||||||
|
}
|
||||||
|
i = i + 1
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@ -279,6 +279,10 @@ path = "lang/src/shared/common/string_helpers.hako"
|
|||||||
|
|
||||||
# Temporary alias keys removed (Phase‑20.33 TTL reached). Use `selfhost.shared.*` above.
|
# Temporary alias keys removed (Phase‑20.33 TTL reached). Use `selfhost.shared.*` above.
|
||||||
|
|
||||||
|
# Numeric core boxes (Phase 21.6)
|
||||||
|
"nyash.core.numeric.intarray" = "lang/src/runtime/numeric/intarray_core_box.hako"
|
||||||
|
"nyash.core.numeric.matrix_i64" = "lang/src/runtime/numeric/mat_i64_box.hako"
|
||||||
|
|
||||||
# v2 Plugin libraries (loader reads these for TypeBox ABI)
|
# v2 Plugin libraries (loader reads these for TypeBox ABI)
|
||||||
[libraries]
|
[libraries]
|
||||||
[libraries."libnyash_filebox_plugin.so"]
|
[libraries."libnyash_filebox_plugin.so"]
|
||||||
|
|||||||
@ -5,7 +5,7 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|||||||
ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||||
BIN="$ROOT/target/release/hakorune"
|
BIN="$ROOT/target/release/hakorune"
|
||||||
|
|
||||||
usage() { echo "Usage: $0 --case {loop|strlen|box|branch|call|stringchain|arraymap|chip8|kilo|sieve|matmul|linidx|maplin} [--n N] [--runs R] [--backend {llvm|vm}] [--exe] [--budget-ms B]"; }
|
usage() { echo "Usage: $0 --case {loop|strlen|box|branch|call|stringchain|arraymap|chip8|kilo|sieve|matmul|matmul_core|linidx|maplin} [--n N] [--runs R] [--backend {llvm|vm}] [--exe] [--budget-ms B]"; }
|
||||||
|
|
||||||
CASE="loop"; N=5000000; RUNS=5; BACKEND="llvm"; EXE_MODE=0; BUDGET_MS=0
|
CASE="loop"; N=5000000; RUNS=5; BACKEND="llvm"; EXE_MODE=0; BUDGET_MS=0
|
||||||
while [[ $# -gt 0 ]]; do
|
while [[ $# -gt 0 ]]; do
|
||||||
@ -562,6 +562,91 @@ C
|
|||||||
rm -f "$TMP_CHECK_JSON" 2>/dev/null || true
|
rm -f "$TMP_CHECK_JSON" 2>/dev/null || true
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
|
matmul_core)
|
||||||
|
# Core numeric matmul using MatI64 + IntArrayCore
|
||||||
|
# Use smaller default N to keep runtime reasonable
|
||||||
|
if [[ "$EXE_MODE" = "1" && "$N" = "5000000" ]]; then
|
||||||
|
N=256
|
||||||
|
fi
|
||||||
|
HAKO_FILE=$(mktemp_hako)
|
||||||
|
cat >"$HAKO_FILE" <<HAKO
|
||||||
|
using nyash.core.numeric.matrix_i64 as MatI64
|
||||||
|
|
||||||
|
static box Main { method main(args) {
|
||||||
|
local n = ${N}
|
||||||
|
// Initialize A, B, C as n x n matrices
|
||||||
|
local A = MatI64.new(n, n)
|
||||||
|
local B = MatI64.new(n, n)
|
||||||
|
local C = MatI64.new(n, n)
|
||||||
|
local i = 0
|
||||||
|
loop(i < n) {
|
||||||
|
local j = 0
|
||||||
|
loop(j < n) {
|
||||||
|
local idx = i*n + j
|
||||||
|
A.set(i, j, idx % 97)
|
||||||
|
B.set(i, j, (idx * 3) % 101)
|
||||||
|
C.set(i, j, 0)
|
||||||
|
j = j + 1
|
||||||
|
}
|
||||||
|
i = i + 1
|
||||||
|
}
|
||||||
|
// Naive matmul via MatI64.mul_naive
|
||||||
|
local out = A.mul_naive(B)
|
||||||
|
return out.at(n-1, n-1)
|
||||||
|
} }
|
||||||
|
HAKO
|
||||||
|
C_FILE=$(mktemp_c)
|
||||||
|
cat >"$C_FILE" <<'C'
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int64_t *ptr;
|
||||||
|
int64_t rows;
|
||||||
|
int64_t cols;
|
||||||
|
int64_t stride;
|
||||||
|
} MatI64Core;
|
||||||
|
|
||||||
|
static inline int64_t mat_get(MatI64Core *m, int64_t r, int64_t c) {
|
||||||
|
return m->ptr[r * m->stride + c];
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void mat_set(MatI64Core *m, int64_t r, int64_t c, int64_t v) {
|
||||||
|
m->ptr[r * m->stride + c] = v;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
int64_t n = N_PLACEHOLDER;
|
||||||
|
int64_t total = n * n;
|
||||||
|
MatI64Core A, B, C;
|
||||||
|
A.rows = B.rows = C.rows = n;
|
||||||
|
A.cols = B.cols = C.cols = n;
|
||||||
|
A.stride = B.stride = C.stride = n;
|
||||||
|
A.ptr = (int64_t*)malloc(sizeof(int64_t)*total);
|
||||||
|
B.ptr = (int64_t*)malloc(sizeof(int64_t)*total);
|
||||||
|
C.ptr = (int64_t*)malloc(sizeof(int64_t)*total);
|
||||||
|
for (int64_t idx = 0; idx < total; idx++) {
|
||||||
|
A.ptr[idx] = idx % 97;
|
||||||
|
B.ptr[idx] = (idx * 3) % 101;
|
||||||
|
C.ptr[idx] = 0;
|
||||||
|
}
|
||||||
|
for (int64_t i = 0; i < n; i++) {
|
||||||
|
for (int64_t k = 0; k < n; k++) {
|
||||||
|
int64_t aik = mat_get(&A, i, k);
|
||||||
|
for (int64_t j = 0; j < n; j++) {
|
||||||
|
int64_t idx = i * C.stride + j;
|
||||||
|
int64_t v = C.ptr[idx] + aik * mat_get(&B, k, j);
|
||||||
|
C.ptr[idx] = v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int64_t r = mat_get(&C, n-1, n-1);
|
||||||
|
free(A.ptr); free(B.ptr); free(C.ptr);
|
||||||
|
return (int)(r & 0xFF);
|
||||||
|
}
|
||||||
|
C
|
||||||
|
sed -i "s/N_PLACEHOLDER/${N}/" "$C_FILE"
|
||||||
|
;;
|
||||||
linidx)
|
linidx)
|
||||||
# Linear index pattern: idx = i*cols + j
|
# Linear index pattern: idx = i*cols + j
|
||||||
# Derive rows/cols from N to keep runtime stable
|
# Derive rows/cols from N to keep runtime stable
|
||||||
|
|||||||
Reference in New Issue
Block a user