diff --git a/CURRENT_TASK.md b/CURRENT_TASK.md index fae7e8a1..b46a0e7b 100644 --- a/CURRENT_TASK.md +++ b/CURRENT_TASK.md @@ -1,4 +1,23 @@ -# Current Task — Phase 21.7(Normalization & Unification: Methodize Static Boxes) +# Current Task — Phase 21.8(Numeric Core Integration & Builder Support) + +Update (2025-11-14 — 21.8 kickoff: MatI64/IntArrayCore builder integration) +- Context: + - 21.5: AotPrep/CollectionsHot v1 + microbench整備まで完了(linidx/maplin ≒ C=100%)。arraymap/matmul は次フェーズ送り。 + - 21.6: NyRT IntArrayCore + Hako IntArrayCore/MatI64/matmul_core スケルトン実装まで完了(builder 経路未対応)。 +- Current goal: + - Hakorune selfhost chain(Stage‑B → MirBuilder → ny‑llvmc(crate))に IntArrayCore/MatI64 を統合し、`matmul_core` ベンチを EXE ラインで実行できるようにする。 + - 実装は Claude Code 担当、このホストは仕様・構造・診断の整理に専念。 + +Planned tasks (for Claude Code) +1) Fix MatI64 visibility in Stage‑B / MirBuilder + - Reproduce provider error: `[mirbuilder/parse/error] undefined variable: MatI64` from `env.mirbuilder.emit` when compiling a small test using `using nyash.core.numeric.matrix_i64 as MatI64`. + - Wire `nyash.core.numeric.matrix_i64` / `nyash.core.numeric.intarray` modules into the resolver/prelude so that Stage‑B/MirBuilder can see MatI64 and IntArrayCore like other core boxes. +2) Make `tools/hakorune_emit_mir.sh` emit MIR(JSON) for `matmul_core` + - With `HAKO_APPLY_AOT_PREP=1 NYASH_AOT_COLLECTIONS_HOT=1 NYASH_LLVM_FAST=1 NYASH_MIR_LOOP_HOIST=1 NYASH_JSON_ONLY=1`, emit MIR(JSON) for the `matmul_core` case in microbench and ensure no undefined‑variable errors. +3) Finish `matmul_core` bench EXE path + - Confirm `tools/perf/microbench.sh --case matmul_core --backend llvm --exe --runs 1 --n 64` builds EXE and runs, logging ratio vs the matching C implementation. +4) Keep defaults stable + - No behaviour changes for existing code/benches; IntArrayCore/MatI64 integration is additive and behind explicit use. Update (2025-11-14 — CollectionsHot rewrite expansion, waiting for Claude Code) - Status: pending (waiting on Claude Code to land rewrite coverage improvements) diff --git a/crates/nyash_kernel/src/plugin/intarray.rs b/crates/nyash_kernel/src/plugin/intarray.rs new file mode 100644 index 00000000..4c535d1d --- /dev/null +++ b/crates/nyash_kernel/src/plugin/intarray.rs @@ -0,0 +1,159 @@ +// IntArrayCore helpers for AOT/VM bridge (handle-based, ring1 numeric core) +// API (Hako-facing via externcall): +// - nyash.intarray.new_h(len) -> handle (IntArrayCore) +// - nyash.intarray.len_h(h) -> i64 +// - nyash.intarray.get_hi(h,i) -> i64 +// - nyash.intarray.set_hii(h,i,v) -> i64 (0=ok, non-zero=error) + +use nyash_rust::{ + box_trait::{BoxCore, NyashBox, StringBox}, + boxes::basic::BoolBox, + runtime::host_handles as handles, +}; +use std::any::Any; +use std::sync::RwLock; + +/// Minimal numeric core: contiguous i64 buffer + length. +/// This box is intended for internal numeric kernels (matmul_core 等) 専用で、 +/// 一般APIは .hako 側のラッパー(MatI64 等)から利用する。 +#[derive(Debug)] +pub struct IntArrayCore { + base: nyash_rust::box_trait::BoxBase, + data: RwLock>, +} + +impl IntArrayCore { + pub fn new(len: i64) -> Self { + let n = if len <= 0 { 0 } else { len as usize }; + IntArrayCore { + base: nyash_rust::box_trait::BoxBase::new(), + data: RwLock::new(vec![0; n]), + } + } + + pub fn len_i64(&self) -> i64 { + self.data.read().unwrap().len() as i64 + } + + pub fn get_i64(&self, idx: i64) -> Option { + if idx < 0 { + return None; + } + let i = idx as usize; + let guard = self.data.read().unwrap(); + guard.get(i).copied() + } + + pub fn set_i64(&self, idx: i64, v: i64) -> bool { + if idx < 0 { + return false; + } + let i = idx as usize; + let mut guard = self.data.write().unwrap(); + if i >= guard.len() { + return false; + } + guard[i] = v; + true + } +} + +impl BoxCore for IntArrayCore { + fn box_id(&self) -> u64 { + self.base.id + } + + fn parent_type_id(&self) -> Option { + self.base.parent_type_id + } + + fn fmt_box(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "IntArrayCore(len={})", self.data.len()) + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } +} + +impl NyashBox for IntArrayCore { + fn to_string_box(&self) -> StringBox { + StringBox::new(&format!("IntArrayCore(len={})", self.data.len())) + } + + fn equals(&self, other: &dyn NyashBox) -> BoolBox { + if let Some(o) = other.as_any().downcast_ref::() { + BoolBox::new(self.data == o.data) + } else { + BoolBox::new(false) + } + } + + fn clone_box(&self) -> Box { + Box::new(IntArrayCore { + base: self.base.clone(), + data: RwLock::new(self.data.read().unwrap().clone()), + }) + } + + fn share_box(&self) -> Box { + // Identity semantics are not required here; clone is fine. + self.clone_box() + } +} + +// --- Extern API (handle-based) --- + +fn get_core(handle: i64) -> Option> { + if handle <= 0 { + return None; + } + handles::get(handle as u64) +} + +#[export_name = \"nyash.intarray.new_h\"] +pub extern \"C\" fn nyash_intarray_new_h(len: i64) -> i64 { + let core = IntArrayCore::new(len); + let arc: std::sync::Arc = std::sync::Arc::new(core); + let h = handles::to_handle_arc(arc) as i64; + if std::env::var(\"NYASH_CLI_VERBOSE\").ok().as_deref() == Some(\"1\") { + eprintln!(\"[INTARRAY] new_h(len={}) -> handle={}\", len, h); + } + h +} + +#[export_name = \"nyash.intarray.len_h\"] +pub extern \"C\" fn nyash_intarray_len_h(handle: i64) -> i64 { + if let Some(obj) = get_core(handle) { + if let Some(core) = obj.as_any().downcast_ref::() { + return core.len_i64(); + } + } + 0 +} + +#[export_name = \"nyash.intarray.get_hi\"] +pub extern \"C\" fn nyash_intarray_get_hi(handle: i64, idx: i64) -> i64 { + if let Some(obj) = get_core(handle) { + if let Some(core) = obj.as_any().downcast_ref::() { + if let Some(v) = core.get_i64(idx) { + return v; + } + } + } + 0 +} + +#[export_name = \"nyash.intarray.set_hii\"] +pub extern \"C\" fn nyash_intarray_set_hii(handle: i64, idx: i64, val: i64) -> i64 { + if let Some(obj) = get_core(handle) { + if let Some(core) = obj.as_any().downcast_ref::() { + return if core.set_i64(idx, val) { 0 } else { 1 }; + } + } + 1 +} diff --git a/crates/nyash_kernel/src/plugin/mod.rs b/crates/nyash_kernel/src/plugin/mod.rs index d382ed3e..12a405ab 100644 --- a/crates/nyash_kernel/src/plugin/mod.rs +++ b/crates/nyash_kernel/src/plugin/mod.rs @@ -6,6 +6,7 @@ pub mod instance; pub mod invoke; pub mod invoke_core; pub mod map; +pub mod intarray; pub mod semantics; pub mod string; @@ -17,5 +18,6 @@ pub use instance::*; pub use invoke::*; pub use invoke_core::*; pub use map::*; +pub use intarray::*; pub use semantics::*; pub use string::*; diff --git a/docs/development/roadmap/phases/phase-21.5-optimization/README.md b/docs/development/roadmap/phases/phase-21.5-optimization/README.md index 928892c3..39273f25 100644 --- a/docs/development/roadmap/phases/phase-21.5-optimization/README.md +++ b/docs/development/roadmap/phases/phase-21.5-optimization/README.md @@ -4,7 +4,7 @@ - .hako 側(AotPrep)で前処理最適化(構造のみ)を行い、LLVM/AOT に渡すIRを軽量にする。 - 既定は挙動不変(opt‑in)。Return 純化ガードで安全性を担保。 -チェックリスト +チェックリスト(21.5 時点の着地) - [x] パス分割(StrlenFold / LoopHoist / ConstDedup / CollectionsHot / BinopCSE) - [x] CollectionsHot(Array/Map)導入(既定OFF) - [x] Map key モード `NYASH_AOT_MAP_KEY_MODE={h|i64|hh|auto}` @@ -17,6 +17,11 @@ - [ ] Idempotence(置換済みタグで再実行時も不変) - [ ] `arraymap`/`matmul` ≤ 125%(C基準) +メモ(21.5 クロージング) +- linidx/maplin など「線形インデックス+Array/Map」系は CollectionsHot + hoist/CSE で C≒100% 近辺まで到達。 +- arraymap は Array/Map 部分の externcall 化は進んだものの、文字列キー生成(toString/`\"k\"+idx`)と hash パスが支配的なため、C の単純 int[] とは根本的に前提が異なる状態で終了。 +- matmul は CollectionsHot 自体は単体では効いているが、行列積そのものが ArrayBox ベースであり、Core 数値箱不在のまま 80% 目標には届かず。これは 21.6 以降の「Core 数値箱+行列箱」導入で扱う。 + トグル - `NYASH_MIR_LOOP_HOIST=1` … StrlenFold/LoopHoist/ConstDedup/BinopCSE を有効化 - `NYASH_AOT_COLLECTIONS_HOT=1` … CollectionsHot(Array/Map) diff --git a/docs/development/roadmap/phases/phase-21.6/core_numeric_boxes.md b/docs/development/roadmap/phases/phase-21.6/core_numeric_boxes.md new file mode 100644 index 00000000..c3a188c4 --- /dev/null +++ b/docs/development/roadmap/phases/phase-21.6/core_numeric_boxes.md @@ -0,0 +1,90 @@ +# Phase 21.6 — Core Numeric Boxes (Draft) + +Status: proposal (to refine at 21.6 kickoff) + +## Goal + +Provide explicit, low‑level numeric boxes that: + +- Give Nyash a “fair” core for int/f64 benchmarks against C. +- Stay compatible with the existing ArrayBox API (no breaking changes). +- Can be used both explicitly in `.hako` and (later) as conservative AotPrep targets. + +This phase focuses on design + minimal implementation; aggressive auto‑rewrites stay behind opt‑in flags. + +## Scope (21.6) + +- Design and add **IntArrayCore** numeric core (NyRT + Hako wrapper): + - NyRT: `IntArrayCore` box(Rust)with internal layout `Vec`(contiguous, row‑major semantics)。 + - Hako: `IntArrayCoreBox` in `nyash.core.numeric.intarray`, wrapping NyRT via externcall: + - `static new(len: i64) -> IntArrayCoreBox` → `nyash.intarray.new_h` + - `length(self) -> i64` → `nyash.intarray.len_h` + - `get_unchecked(self, idx: i64) -> i64` → `nyash.intarray.get_hi` + - `set_unchecked(self, idx: i64, v: i64)` → `nyash.intarray.set_hii` + - Semantics: i64‑only、固定長(構造変更なし)。境界チェックは NyRT 側(Fail‑Fast)に限定し、Hako 側は数値カーネル専用の薄いラッパーに留める。 + +- Design and add **MatI64** (matrix box) on top of IntArrayCore: + - Internal layout: `rows: i64`, `cols: i64`, `stride: i64`, `core: IntArrayCoreBox`. + - Minimal API: + - `new(rows: i64, cols: i64) -> MatI64` + - `rows(self) -> i64`, `cols(self) -> i64` + - `at(self, r: i64, c: i64) -> i64` + - `set(self, r: i64, c: i64, v: i64)` + - Provide one reference implementation: + - `MatOps.matmul_naive(a: MatI64, b: MatI64) -> MatI64` (O(n³), clear structure, not tuned). + +- Bench alignment: + - Add `matmul_core` benchmark: + - Nyash: MatI64 + IntArrayCore implementation. + - C: struct `{ int64_t *ptr; int64_t rows; int64_t cols; int64_t stride; }` + helper `get/set`. + - Keep existing `matmul` (ArrayBox vs raw `int*`) as “language‑level” benchmark. + +Out of scope for 21.6: + +- Auto‑rewrite from `ArrayBox` → `IntArrayCore` / `MatI64` in AotPrep (only sketched, not default). +- SIMD / blocked matmul / cache‑tuned kernels (can be separate optimization phases). +- f64/complex variants (only type skeletons, if any). + +## Design Notes + +- **Layering** + - Core: IntArrayCore (and future F64ArrayCore) are “muscle” boxes: minimal, numeric‑only. NyRT では IntArrayCore(Rust)、Hako では IntArrayCoreBox として露出。 + - Matrix: MatI64 expresses 2D shape and indexing; it owns an IntArrayCoreBox. + - High‑level: ArrayBox / MapBox / existing user APIs remain unchanged. + +- **Hako ABI vs Nyash implementation** + - IntArrayCore lives as a NyRT box (C/Rust implementation) exposed via Hako ABI (`nyash.intarray.*`). + - IntArrayCoreBox, MatI64 and MatOps are written in Nyash, calling IntArrayCore via externcall while exposing boxcall APIs to user code. + - This keeps heavy lifting in NyRT while keeping the 2D semantics in `.hako`. + +- **Fair C comparison** + - For `matmul_core`, C should mirror IntArrayCore/MatI64: + - Same struct layout (ptr + len / rows + cols + stride). + - Same naive O(n³) algorithm. + - This separates: + - “Nyash vs C as languages” → existing `matmul` (ArrayBox vs `int*`). + - “Core numeric kernel parity” → new `matmul_core` (IntArrayCore vs equivalent C). + +## AotPrep / Future Work (21.6+) + +Not for default in 21.6, but to keep in mind: + +- Add conservative patterns in Collections/AotPrep to detect: + - `ArrayBox` with: + - Fixed length. + - No structural mutations after initialization. + - Access patterns of the form `base + i*cols + j` (or similar linear forms). + - Allow opt‑in rewrite from such patterns to IntArrayCore/MatI64 calls. + +- Keep all auto‑rewrites: + - Behind env toggles (e.g. `NYASH_AOT_INTARRAY_CORE=1`). + - Semantics‑preserving by construction; fall back to ArrayBox path when unsure. + +## Open Questions for 21.6 Kickoff + +- Exact module names: + - `nyash.core.intarray` / `nyash.core.matrix` vs `nyash.linalg.*`. +- Bounds checking policy for IntArrayCore: + - Always on (fail‑fast) vs dev toggle for light checks in hot loops. +- Interop: + - Whether MatI64 should expose its IntArrayCore (e.g. `as_core_row_major()`) for advanced users. diff --git a/docs/development/roadmap/phases/phase-21.8/README.md b/docs/development/roadmap/phases/phase-21.8/README.md new file mode 100644 index 00000000..40e30a36 --- /dev/null +++ b/docs/development/roadmap/phases/phase-21.8/README.md @@ -0,0 +1,85 @@ +# Phase 21.8 — Numeric Core Integration & Builder Support + +Status: proposal (to hand off to Claude Code) + +## Goal + +Integrate the new numeric core boxes (IntArrayCore + MatI64) into the Hakorune selfhost chain so that: + +- Stage‑B → MirBuilder → ny‑llvmc(crate) can emit MIR(JSON) and EXE for code that uses: + - `using nyash.core.numeric.intarray as IntArrayCore` + - `using nyash.core.numeric.matrix_i64 as MatI64` +- The `matmul_core` microbench (MatI64 + IntArrayCore) runs end‑to‑end in EXE mode and can be compared fairly against a matching C implementation. + +21.6 provides the core boxes; 21.8 focuses on wiring them into the builder/runtime chain without changing default behaviour for other code. + +## Scope (21.8, this host) + +- Stage‑B / MirBuilder: + - Ensure `MatI64` and `IntArrayCore` are recognized as valid boxes when referenced via: + - `using nyash.core.numeric.matrix_i64 as MatI64` + - `using nyash.core.numeric.intarray as IntArrayCore` + - Fix the current provider‑emit failure: + - Error today: `[mirbuilder/parse/error] undefined variable: MatI64` during `env.mirbuilder.emit`. + - Diagnose and adjust Stage‑B / MirBuilder so that static box references (`MatI64.new`, `A.mul_naive`) compile in the same way as other boxes. + +- AotPrep / emit pipeline: + - Keep AotPrep unchanged for now; the goal is to make `tools/hakorune_emit_mir.sh` succeed on `matmul_core` sources without special‑casing. + - Ensure `tools/hakorune_emit_mir.sh` with: + - `HAKO_APPLY_AOT_PREP=1 NYASH_AOT_COLLECTIONS_HOT=1 NYASH_LLVM_FAST=1 NYASH_MIR_LOOP_HOIST=1` + - can emit valid MIR(JSON) for MatI64/IntArrayCore code. + +- Microbench integration: + - Finish wiring `matmul_core` in `tools/perf/microbench.sh`: + - Hako side: MatI64/IntArrayCore based O(n³) matmul (`MatI64.mul_naive`). + - C side: `MatI64Core { int64_t *ptr; rows; cols; stride; }` with identical algorithm. + - Accept that performance may still be far from the 80% target; 21.8 focuses on **structural integration and parity**, not tuning. + +Out of scope: + +- New optimizations inside AotPrep / CollectionsHot. +- SIMD/blocked matmul kernels (to be handled in a later optimization phase). +- f64/complex matrix variants. + +## Tasks for implementation (Claude Code) + +1) **Fix MatI64 visibility in Stage‑B / MirBuilder** + - Reproduce the current failure: + - Use a small `.hako` like: + - `using nyash.core.numeric.matrix_i64 as MatI64` + - `static box Main { method main(args) { local n = 4; local A = MatI64.new(n,n); return A.at(0,0); } }` + - Confirm `env.mirbuilder.emit` reports `undefined variable: MatI64`. + - Investigate how modules from `nyash.toml` (`"nyash.core.numeric.matrix_i64" = "lang/src/runtime/numeric/mat_i64_box.hako"`) are made visible to Stage‑B and MirBuilder. + - Adjust the resolver / module prelude so that `MatI64` (and `IntArrayCore`) are treated like other core boxes: + - Either via explicit prelude inclusion, + - Or via module registry entries consumed by the builder. + +2) **Ensure `tools/hakorune_emit_mir.sh` can emit MIR(JSON) for matmul_core** + - Once MatI64 is visible, run: + - `HAKO_APPLY_AOT_PREP=1 NYASH_AOT_COLLECTIONS_HOT=1 NYASH_LLVM_FAST=1 NYASH_MIR_LOOP_HOIST=1 NYASH_JSON_ONLY=1 tools/hakorune_emit_mir.sh tmp/matmul_core.json` + - Acceptance: + - No `undefined variable: MatI64` / `IntArrayCore` errors. + - `tmp/matmul_core.json` is valid MIR(JSON) (same schema as existing matmul case). + +3) **Finish `matmul_core` microbench** + - Use the existing skeleton in `tools/perf/microbench.sh` (`case matmul_core`): + - Confirm Hako side compiles and runs under `--backend vm`. + - Confirm EXE path works: + - `NYASH_SKIP_TOML_ENV=1 NYASH_LLVM_SKIP_BUILD=1 tools/perf/microbench.sh --case matmul_core --backend llvm --exe --runs 1 --n 64` + - Update `benchmarks/README.md`: + - Add `matmul_core` row with a short description: + - “MatI64/IntArrayCore vs MatI64Core C struct (ptr+rows+cols+stride)” + - Record initial ratios (even if far from 80%). + +4) **Keep existing behaviour stable** + - No changes to default user behaviour, env toggles, or existing benches beyond adding `matmul_core`. + - Ensure quick/profile smokes (where applicable) remain green with numeric core present. + +## Notes + +- 21.6 already introduced: + - NyRT `IntArrayCore` (Vec + RwLock) and handle‑based externs (`nyash.intarray.*`). + - Hako wrappers `IntArrayCore` and `MatI64` in `lang/src/runtime/numeric/`. + - `nyash.toml` module aliases for `nyash.core.numeric.intarray` and `nyash.core.numeric.matrix_i64`. +- 21.8 is about wiring these into the builder/emit chain so that Hakorune can compile and benchmark numeric core code end‑to‑end. + diff --git a/hako.toml b/hako.toml new file mode 100644 index 00000000..2cc3b60c --- /dev/null +++ b/hako.toml @@ -0,0 +1,4 @@ +[env] +# This file mirrors nyash.toml for Hakorune/Hako tools. +# Primary runtime config remains nyash.toml; hako.toml exists as a dev-facing alias. + diff --git a/lang/src/runtime/numeric/intarray_core_box.hako b/lang/src/runtime/numeric/intarray_core_box.hako new file mode 100644 index 00000000..32414bfa --- /dev/null +++ b/lang/src/runtime/numeric/intarray_core_box.hako @@ -0,0 +1,28 @@ +// IntArrayCoreBox — thin Hako wrapper over NyRT IntArrayCore (handle-based) +// This provides a box-level API for numeric kernels while delegating storage +// to nyash.intarray.* extern calls in NyRT (ring1). + +static box IntArrayCore { + init { handle, len } + + static new(len) { + local h = externcall "nyash.intarray.new_h"(len) + local b = new IntArrayCore() + b.handle = h + b.len = len + return b + } + + length() { + return externcall "nyash.intarray.len_h"(me.handle) + } + + get_unchecked(idx) { + return externcall "nyash.intarray.get_hi"(me.handle, idx) + } + + set_unchecked(idx, v) { + externcall "nyash.intarray.set_hii"(me.handle, idx, v) + return null + } +} diff --git a/lang/src/runtime/numeric/mat_i64_box.hako b/lang/src/runtime/numeric/mat_i64_box.hako new file mode 100644 index 00000000..053c43a0 --- /dev/null +++ b/lang/src/runtime/numeric/mat_i64_box.hako @@ -0,0 +1,65 @@ +// MatI64 — simple i64 matrix box built on top of IntArrayCore. +// Internal layout: rows, cols, stride, core (IntArrayCore). + +using nyash.core.numeric.intarray as IntArrayCore + +static box MatI64 { + init { rows, cols, stride, core } + + static new(rows, cols) { + local total = rows * cols + local core = IntArrayCore.new(total) + local m = new MatI64() + m.rows = rows + m.cols = cols + m.stride = cols + m.core = core + return m + } + + rowsCount() { + return me.rows + } + + colsCount() { + return me.cols + } + + at(r, c) { + local idx = r * me.stride + c + return me.core.get_unchecked(idx) + } + + set(r, c, v) { + local idx = r * me.stride + c + me.core.set_unchecked(idx, v) + return null + } + + // Naive O(n^3) matmul: this * b + mul_naive(b) { + local n = me.rows + local mcols = me.cols + local bcols = b.cols + // assume shapes are compatible and square for now (Phase 21.6 draft) + local out = MatI64.new(n, bcols) + local i = 0 + loop(i < n) { + local k = 0 + loop(k < mcols) { + local aik = me.at(i, k) + local j = 0 + loop(j < bcols) { + local idx = i * out.stride + j + local v = out.core.get_unchecked(idx) + aik * b.at(k, j) + out.core.set_unchecked(idx, v) + j = j + 1 + } + k = k + 1 + } + i = i + 1 + } + return out + } +} + diff --git a/nyash.toml b/nyash.toml index 1278c437..79fe4134 100644 --- a/nyash.toml +++ b/nyash.toml @@ -279,6 +279,10 @@ path = "lang/src/shared/common/string_helpers.hako" # Temporary alias keys removed (Phase‑20.33 TTL reached). Use `selfhost.shared.*` above. +# Numeric core boxes (Phase 21.6) +"nyash.core.numeric.intarray" = "lang/src/runtime/numeric/intarray_core_box.hako" +"nyash.core.numeric.matrix_i64" = "lang/src/runtime/numeric/mat_i64_box.hako" + # v2 Plugin libraries (loader reads these for TypeBox ABI) [libraries] [libraries."libnyash_filebox_plugin.so"] diff --git a/tools/perf/microbench.sh b/tools/perf/microbench.sh index eeb371a7..90f23d22 100644 --- a/tools/perf/microbench.sh +++ b/tools/perf/microbench.sh @@ -5,7 +5,7 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" BIN="$ROOT/target/release/hakorune" -usage() { echo "Usage: $0 --case {loop|strlen|box|branch|call|stringchain|arraymap|chip8|kilo|sieve|matmul|linidx|maplin} [--n N] [--runs R] [--backend {llvm|vm}] [--exe] [--budget-ms B]"; } +usage() { echo "Usage: $0 --case {loop|strlen|box|branch|call|stringchain|arraymap|chip8|kilo|sieve|matmul|matmul_core|linidx|maplin} [--n N] [--runs R] [--backend {llvm|vm}] [--exe] [--budget-ms B]"; } CASE="loop"; N=5000000; RUNS=5; BACKEND="llvm"; EXE_MODE=0; BUDGET_MS=0 while [[ $# -gt 0 ]]; do @@ -562,6 +562,91 @@ C rm -f "$TMP_CHECK_JSON" 2>/dev/null || true fi ;; + matmul_core) + # Core numeric matmul using MatI64 + IntArrayCore + # Use smaller default N to keep runtime reasonable + if [[ "$EXE_MODE" = "1" && "$N" = "5000000" ]]; then + N=256 + fi + HAKO_FILE=$(mktemp_hako) + cat >"$HAKO_FILE" <"$C_FILE" <<'C' +#include +#include + +typedef struct { + int64_t *ptr; + int64_t rows; + int64_t cols; + int64_t stride; +} MatI64Core; + +static inline int64_t mat_get(MatI64Core *m, int64_t r, int64_t c) { + return m->ptr[r * m->stride + c]; +} + +static inline void mat_set(MatI64Core *m, int64_t r, int64_t c, int64_t v) { + m->ptr[r * m->stride + c] = v; +} + +int main() { + int64_t n = N_PLACEHOLDER; + int64_t total = n * n; + MatI64Core A, B, C; + A.rows = B.rows = C.rows = n; + A.cols = B.cols = C.cols = n; + A.stride = B.stride = C.stride = n; + A.ptr = (int64_t*)malloc(sizeof(int64_t)*total); + B.ptr = (int64_t*)malloc(sizeof(int64_t)*total); + C.ptr = (int64_t*)malloc(sizeof(int64_t)*total); + for (int64_t idx = 0; idx < total; idx++) { + A.ptr[idx] = idx % 97; + B.ptr[idx] = (idx * 3) % 101; + C.ptr[idx] = 0; + } + for (int64_t i = 0; i < n; i++) { + for (int64_t k = 0; k < n; k++) { + int64_t aik = mat_get(&A, i, k); + for (int64_t j = 0; j < n; j++) { + int64_t idx = i * C.stride + j; + int64_t v = C.ptr[idx] + aik * mat_get(&B, k, j); + C.ptr[idx] = v; + } + } + } + int64_t r = mat_get(&C, n-1, n-1); + free(A.ptr); free(B.ptr); free(C.ptr); + return (int)(r & 0xFF); +} +C + sed -i "s/N_PLACEHOLDER/${N}/" "$C_FILE" + ;; linidx) # Linear index pattern: idx = i*cols + j # Derive rows/cols from N to keep runtime stable