feat(perf): add Phase 21.8 foundation for IntArrayCore/MatI64 numeric boxes

Prepare infrastructure for specialized numeric array benchmarking:
- Add IntArrayCore plugin stub (crates/nyash_kernel/src/plugin/intarray.rs)
- Add IntArrayCore/MatI64 box definitions (lang/src/runtime/numeric/)
- Add Phase 21.8 documentation and task tracking
- Update nyash.toml/hako.toml with numeric library configuration
- Extend microbench.sh for matmul_core benchmark case

Next: Resolve Stage-B MirBuilder to recognize MatI64/IntArrayCore as boxes

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
nyash-codex
2025-11-14 15:18:14 +09:00
parent f1fa182a4b
commit 8214176814
11 changed files with 549 additions and 3 deletions

View File

@ -1,4 +1,23 @@
# Current Task — Phase 21.7Normalization & Unification: Methodize Static Boxes
# Current Task — Phase 21.8Numeric Core Integration & Builder Support
Update (2025-11-14 — 21.8 kickoff: MatI64/IntArrayCore builder integration)
- Context:
- 21.5: AotPrep/CollectionsHot v1 + microbench整備まで完了linidx/maplin ≒ C=100%。arraymap/matmul は次フェーズ送り。
- 21.6: NyRT IntArrayCore + Hako IntArrayCore/MatI64/matmul_core スケルトン実装まで完了builder 経路未対応)。
- Current goal:
- Hakorune selfhost chainStageB → MirBuilder → nyllvmc(crate))に IntArrayCore/MatI64 を統合し、`matmul_core` ベンチを EXE ラインで実行できるようにする。
- 実装は Claude Code 担当、このホストは仕様・構造・診断の整理に専念。
Planned tasks (for Claude Code)
1) Fix MatI64 visibility in StageB / MirBuilder
- Reproduce provider error: `[mirbuilder/parse/error] undefined variable: MatI64` from `env.mirbuilder.emit` when compiling a small test using `using nyash.core.numeric.matrix_i64 as MatI64`.
- Wire `nyash.core.numeric.matrix_i64` / `nyash.core.numeric.intarray` modules into the resolver/prelude so that StageB/MirBuilder can see MatI64 and IntArrayCore like other core boxes.
2) Make `tools/hakorune_emit_mir.sh` emit MIR(JSON) for `matmul_core`
- With `HAKO_APPLY_AOT_PREP=1 NYASH_AOT_COLLECTIONS_HOT=1 NYASH_LLVM_FAST=1 NYASH_MIR_LOOP_HOIST=1 NYASH_JSON_ONLY=1`, emit MIR(JSON) for the `matmul_core` case in microbench and ensure no undefinedvariable errors.
3) Finish `matmul_core` bench EXE path
- Confirm `tools/perf/microbench.sh --case matmul_core --backend llvm --exe --runs 1 --n 64` builds EXE and runs, logging ratio vs the matching C implementation.
4) Keep defaults stable
- No behaviour changes for existing code/benches; IntArrayCore/MatI64 integration is additive and behind explicit use.
Update (2025-11-14 — CollectionsHot rewrite expansion, waiting for Claude Code)
- Status: pending (waiting on Claude Code to land rewrite coverage improvements)

View File

@ -0,0 +1,159 @@
// IntArrayCore helpers for AOT/VM bridge (handle-based, ring1 numeric core)
// API (Hako-facing via externcall):
// - nyash.intarray.new_h(len) -> handle (IntArrayCore)
// - nyash.intarray.len_h(h) -> i64
// - nyash.intarray.get_hi(h,i) -> i64
// - nyash.intarray.set_hii(h,i,v) -> i64 (0=ok, non-zero=error)
use nyash_rust::{
box_trait::{BoxCore, NyashBox, StringBox},
boxes::basic::BoolBox,
runtime::host_handles as handles,
};
use std::any::Any;
use std::sync::RwLock;
/// Minimal numeric core: contiguous i64 buffer + length.
/// This box is intended for internal numeric kernels (matmul_core 等) 専用で、
/// 一般APIは .hako 側のラッパーMatI64 等)から利用する。
#[derive(Debug)]
pub struct IntArrayCore {
base: nyash_rust::box_trait::BoxBase,
data: RwLock<Vec<i64>>,
}
impl IntArrayCore {
pub fn new(len: i64) -> Self {
let n = if len <= 0 { 0 } else { len as usize };
IntArrayCore {
base: nyash_rust::box_trait::BoxBase::new(),
data: RwLock::new(vec![0; n]),
}
}
pub fn len_i64(&self) -> i64 {
self.data.read().unwrap().len() as i64
}
pub fn get_i64(&self, idx: i64) -> Option<i64> {
if idx < 0 {
return None;
}
let i = idx as usize;
let guard = self.data.read().unwrap();
guard.get(i).copied()
}
pub fn set_i64(&self, idx: i64, v: i64) -> bool {
if idx < 0 {
return false;
}
let i = idx as usize;
let mut guard = self.data.write().unwrap();
if i >= guard.len() {
return false;
}
guard[i] = v;
true
}
}
impl BoxCore for IntArrayCore {
fn box_id(&self) -> u64 {
self.base.id
}
fn parent_type_id(&self) -> Option<std::any::TypeId> {
self.base.parent_type_id
}
fn fmt_box(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "IntArrayCore(len={})", self.data.len())
}
fn as_any(&self) -> &dyn Any {
self
}
fn as_any_mut(&mut self) -> &mut dyn Any {
self
}
}
impl NyashBox for IntArrayCore {
fn to_string_box(&self) -> StringBox {
StringBox::new(&format!("IntArrayCore(len={})", self.data.len()))
}
fn equals(&self, other: &dyn NyashBox) -> BoolBox {
if let Some(o) = other.as_any().downcast_ref::<IntArrayCore>() {
BoolBox::new(self.data == o.data)
} else {
BoolBox::new(false)
}
}
fn clone_box(&self) -> Box<dyn NyashBox> {
Box::new(IntArrayCore {
base: self.base.clone(),
data: RwLock::new(self.data.read().unwrap().clone()),
})
}
fn share_box(&self) -> Box<dyn NyashBox> {
// Identity semantics are not required here; clone is fine.
self.clone_box()
}
}
// --- Extern API (handle-based) ---
fn get_core(handle: i64) -> Option<std::sync::Arc<dyn NyashBox>> {
if handle <= 0 {
return None;
}
handles::get(handle as u64)
}
#[export_name = \"nyash.intarray.new_h\"]
pub extern \"C\" fn nyash_intarray_new_h(len: i64) -> i64 {
let core = IntArrayCore::new(len);
let arc: std::sync::Arc<dyn NyashBox> = std::sync::Arc::new(core);
let h = handles::to_handle_arc(arc) as i64;
if std::env::var(\"NYASH_CLI_VERBOSE\").ok().as_deref() == Some(\"1\") {
eprintln!(\"[INTARRAY] new_h(len={}) -> handle={}\", len, h);
}
h
}
#[export_name = \"nyash.intarray.len_h\"]
pub extern \"C\" fn nyash_intarray_len_h(handle: i64) -> i64 {
if let Some(obj) = get_core(handle) {
if let Some(core) = obj.as_any().downcast_ref::<IntArrayCore>() {
return core.len_i64();
}
}
0
}
#[export_name = \"nyash.intarray.get_hi\"]
pub extern \"C\" fn nyash_intarray_get_hi(handle: i64, idx: i64) -> i64 {
if let Some(obj) = get_core(handle) {
if let Some(core) = obj.as_any().downcast_ref::<IntArrayCore>() {
if let Some(v) = core.get_i64(idx) {
return v;
}
}
}
0
}
#[export_name = \"nyash.intarray.set_hii\"]
pub extern \"C\" fn nyash_intarray_set_hii(handle: i64, idx: i64, val: i64) -> i64 {
if let Some(obj) = get_core(handle) {
if let Some(core) = obj.as_any().downcast_ref::<IntArrayCore>() {
return if core.set_i64(idx, val) { 0 } else { 1 };
}
}
1
}

View File

@ -6,6 +6,7 @@ pub mod instance;
pub mod invoke;
pub mod invoke_core;
pub mod map;
pub mod intarray;
pub mod semantics;
pub mod string;
@ -17,5 +18,6 @@ pub use instance::*;
pub use invoke::*;
pub use invoke_core::*;
pub use map::*;
pub use intarray::*;
pub use semantics::*;
pub use string::*;

View File

@ -4,7 +4,7 @@
- .hako 側AotPrepで前処理最適化構造のみを行い、LLVM/AOT に渡すIRを軽量にする。
- 既定は挙動不変optin。Return 純化ガードで安全性を担保。
チェックリスト
チェックリスト21.5 時点の着地)
- [x] パス分割StrlenFold / LoopHoist / ConstDedup / CollectionsHot / BinopCSE
- [x] CollectionsHotArray/Map導入既定OFF
- [x] Map key モード `NYASH_AOT_MAP_KEY_MODE={h|i64|hh|auto}`
@ -17,6 +17,11 @@
- [ ] Idempotence置換済みタグで再実行時も不変
- [ ] `arraymap`/`matmul` ≤ 125%C基準
メモ21.5 クロージング)
- linidx/maplin など「線形インデックスArray/Map」系は CollectionsHot + hoist/CSE で C≒100% 近辺まで到達。
- arraymap は Array/Map 部分の externcall 化は進んだものの、文字列キー生成toString/`\"k\"+idx`)と hash パスが支配的なため、C の単純 int[] とは根本的に前提が異なる状態で終了。
- matmul は CollectionsHot 自体は単体では効いているが、行列積そのものが ArrayBox ベースであり、Core 数値箱不在のまま 80% 目標には届かず。これは 21.6 以降の「Core 数値箱+行列箱」導入で扱う。
トグル
- `NYASH_MIR_LOOP_HOIST=1` … StrlenFold/LoopHoist/ConstDedup/BinopCSE を有効化
- `NYASH_AOT_COLLECTIONS_HOT=1` … CollectionsHotArray/Map

View File

@ -0,0 +1,90 @@
# Phase 21.6 — Core Numeric Boxes (Draft)
Status: proposal (to refine at 21.6 kickoff)
## Goal
Provide explicit, lowlevel numeric boxes that:
- Give Nyash a “fair” core for int/f64 benchmarks against C.
- Stay compatible with the existing ArrayBox API (no breaking changes).
- Can be used both explicitly in `.hako` and (later) as conservative AotPrep targets.
This phase focuses on design + minimal implementation; aggressive autorewrites stay behind optin flags.
## Scope (21.6)
- Design and add **IntArrayCore** numeric core (NyRT + Hako wrapper):
- NyRT: `IntArrayCore` boxRustwith internal layout `Vec<i64>`contiguous, rowmajor semantics
- Hako: `IntArrayCoreBox` in `nyash.core.numeric.intarray`, wrapping NyRT via externcall:
- `static new(len: i64) -> IntArrayCoreBox``nyash.intarray.new_h`
- `length(self) -> i64``nyash.intarray.len_h`
- `get_unchecked(self, idx: i64) -> i64``nyash.intarray.get_hi`
- `set_unchecked(self, idx: i64, v: i64)``nyash.intarray.set_hii`
- Semantics: i64only、固定長構造変更なし。境界チェックは NyRT 側FailFastに限定し、Hako 側は数値カーネル専用の薄いラッパーに留める。
- Design and add **MatI64** (matrix box) on top of IntArrayCore:
- Internal layout: `rows: i64`, `cols: i64`, `stride: i64`, `core: IntArrayCoreBox`.
- Minimal API:
- `new(rows: i64, cols: i64) -> MatI64`
- `rows(self) -> i64`, `cols(self) -> i64`
- `at(self, r: i64, c: i64) -> i64`
- `set(self, r: i64, c: i64, v: i64)`
- Provide one reference implementation:
- `MatOps.matmul_naive(a: MatI64, b: MatI64) -> MatI64` (O(n³), clear structure, not tuned).
- Bench alignment:
- Add `matmul_core` benchmark:
- Nyash: MatI64 + IntArrayCore implementation.
- C: struct `{ int64_t *ptr; int64_t rows; int64_t cols; int64_t stride; }` + helper `get/set`.
- Keep existing `matmul` (ArrayBox vs raw `int*`) as “languagelevel” benchmark.
Out of scope for 21.6:
- Autorewrite from `ArrayBox``IntArrayCore` / `MatI64` in AotPrep (only sketched, not default).
- SIMD / blocked matmul / cachetuned kernels (can be separate optimization phases).
- f64/complex variants (only type skeletons, if any).
## Design Notes
- **Layering**
- Core: IntArrayCore (and future F64ArrayCore) are “muscle” boxes: minimal, numericonly. NyRT では IntArrayCoreRust、Hako では IntArrayCoreBox として露出。
- Matrix: MatI64 expresses 2D shape and indexing; it owns an IntArrayCoreBox.
- Highlevel: ArrayBox / MapBox / existing user APIs remain unchanged.
- **Hako ABI vs Nyash implementation**
- IntArrayCore lives as a NyRT box (C/Rust implementation) exposed via Hako ABI (`nyash.intarray.*`).
- IntArrayCoreBox, MatI64 and MatOps are written in Nyash, calling IntArrayCore via externcall while exposing boxcall APIs to user code.
- This keeps heavy lifting in NyRT while keeping the 2D semantics in `.hako`.
- **Fair C comparison**
- For `matmul_core`, C should mirror IntArrayCore/MatI64:
- Same struct layout (ptr + len / rows + cols + stride).
- Same naive O(n³) algorithm.
- This separates:
- “Nyash vs C as languages” → existing `matmul` (ArrayBox vs `int*`).
- “Core numeric kernel parity” → new `matmul_core` (IntArrayCore vs equivalent C).
## AotPrep / Future Work (21.6+)
Not for default in 21.6, but to keep in mind:
- Add conservative patterns in Collections/AotPrep to detect:
- `ArrayBox<i64>` with:
- Fixed length.
- No structural mutations after initialization.
- Access patterns of the form `base + i*cols + j` (or similar linear forms).
- Allow optin rewrite from such patterns to IntArrayCore/MatI64 calls.
- Keep all autorewrites:
- Behind env toggles (e.g. `NYASH_AOT_INTARRAY_CORE=1`).
- Semanticspreserving by construction; fall back to ArrayBox path when unsure.
## Open Questions for 21.6 Kickoff
- Exact module names:
- `nyash.core.intarray` / `nyash.core.matrix` vs `nyash.linalg.*`.
- Bounds checking policy for IntArrayCore:
- Always on (failfast) vs dev toggle for light checks in hot loops.
- Interop:
- Whether MatI64 should expose its IntArrayCore (e.g. `as_core_row_major()`) for advanced users.

View File

@ -0,0 +1,85 @@
# Phase 21.8 — Numeric Core Integration & Builder Support
Status: proposal (to hand off to Claude Code)
## Goal
Integrate the new numeric core boxes (IntArrayCore + MatI64) into the Hakorune selfhost chain so that:
- StageB → MirBuilder → nyllvmc(crate) can emit MIR(JSON) and EXE for code that uses:
- `using nyash.core.numeric.intarray as IntArrayCore`
- `using nyash.core.numeric.matrix_i64 as MatI64`
- The `matmul_core` microbench (MatI64 + IntArrayCore) runs endtoend in EXE mode and can be compared fairly against a matching C implementation.
21.6 provides the core boxes; 21.8 focuses on wiring them into the builder/runtime chain without changing default behaviour for other code.
## Scope (21.8, this host)
- StageB / MirBuilder:
- Ensure `MatI64` and `IntArrayCore` are recognized as valid boxes when referenced via:
- `using nyash.core.numeric.matrix_i64 as MatI64`
- `using nyash.core.numeric.intarray as IntArrayCore`
- Fix the current provideremit failure:
- Error today: `[mirbuilder/parse/error] undefined variable: MatI64` during `env.mirbuilder.emit`.
- Diagnose and adjust StageB / MirBuilder so that static box references (`MatI64.new`, `A.mul_naive`) compile in the same way as other boxes.
- AotPrep / emit pipeline:
- Keep AotPrep unchanged for now; the goal is to make `tools/hakorune_emit_mir.sh` succeed on `matmul_core` sources without specialcasing.
- Ensure `tools/hakorune_emit_mir.sh` with:
- `HAKO_APPLY_AOT_PREP=1 NYASH_AOT_COLLECTIONS_HOT=1 NYASH_LLVM_FAST=1 NYASH_MIR_LOOP_HOIST=1`
- can emit valid MIR(JSON) for MatI64/IntArrayCore code.
- Microbench integration:
- Finish wiring `matmul_core` in `tools/perf/microbench.sh`:
- Hako side: MatI64/IntArrayCore based O(n³) matmul (`MatI64.mul_naive`).
- C side: `MatI64Core { int64_t *ptr; rows; cols; stride; }` with identical algorithm.
- Accept that performance may still be far from the 80% target; 21.8 focuses on **structural integration and parity**, not tuning.
Out of scope:
- New optimizations inside AotPrep / CollectionsHot.
- SIMD/blocked matmul kernels (to be handled in a later optimization phase).
- f64/complex matrix variants.
## Tasks for implementation (Claude Code)
1) **Fix MatI64 visibility in StageB / MirBuilder**
- Reproduce the current failure:
- Use a small `.hako` like:
- `using nyash.core.numeric.matrix_i64 as MatI64`
- `static box Main { method main(args) { local n = 4; local A = MatI64.new(n,n); return A.at(0,0); } }`
- Confirm `env.mirbuilder.emit` reports `undefined variable: MatI64`.
- Investigate how modules from `nyash.toml` (`"nyash.core.numeric.matrix_i64" = "lang/src/runtime/numeric/mat_i64_box.hako"`) are made visible to StageB and MirBuilder.
- Adjust the resolver / module prelude so that `MatI64` (and `IntArrayCore`) are treated like other core boxes:
- Either via explicit prelude inclusion,
- Or via module registry entries consumed by the builder.
2) **Ensure `tools/hakorune_emit_mir.sh` can emit MIR(JSON) for matmul_core**
- Once MatI64 is visible, run:
- `HAKO_APPLY_AOT_PREP=1 NYASH_AOT_COLLECTIONS_HOT=1 NYASH_LLVM_FAST=1 NYASH_MIR_LOOP_HOIST=1 NYASH_JSON_ONLY=1 tools/hakorune_emit_mir.sh <matmul_core.hako> tmp/matmul_core.json`
- Acceptance:
- No `undefined variable: MatI64` / `IntArrayCore` errors.
- `tmp/matmul_core.json` is valid MIR(JSON) (same schema as existing matmul case).
3) **Finish `matmul_core` microbench**
- Use the existing skeleton in `tools/perf/microbench.sh` (`case matmul_core`):
- Confirm Hako side compiles and runs under `--backend vm`.
- Confirm EXE path works:
- `NYASH_SKIP_TOML_ENV=1 NYASH_LLVM_SKIP_BUILD=1 tools/perf/microbench.sh --case matmul_core --backend llvm --exe --runs 1 --n 64`
- Update `benchmarks/README.md`:
- Add `matmul_core` row with a short description:
- “MatI64/IntArrayCore vs MatI64Core C struct (ptr+rows+cols+stride)”
- Record initial ratios (even if far from 80%).
4) **Keep existing behaviour stable**
- No changes to default user behaviour, env toggles, or existing benches beyond adding `matmul_core`.
- Ensure quick/profile smokes (where applicable) remain green with numeric core present.
## Notes
- 21.6 already introduced:
- NyRT `IntArrayCore` (Vec<i64> + RwLock) and handlebased externs (`nyash.intarray.*`).
- Hako wrappers `IntArrayCore` and `MatI64` in `lang/src/runtime/numeric/`.
- `nyash.toml` module aliases for `nyash.core.numeric.intarray` and `nyash.core.numeric.matrix_i64`.
- 21.8 is about wiring these into the builder/emit chain so that Hakorune can compile and benchmark numeric core code endtoend.

4
hako.toml Normal file
View File

@ -0,0 +1,4 @@
[env]
# This file mirrors nyash.toml for Hakorune/Hako tools.
# Primary runtime config remains nyash.toml; hako.toml exists as a dev-facing alias.

View File

@ -0,0 +1,28 @@
// IntArrayCoreBox — thin Hako wrapper over NyRT IntArrayCore (handle-based)
// This provides a box-level API for numeric kernels while delegating storage
// to nyash.intarray.* extern calls in NyRT (ring1).
static box IntArrayCore {
init { handle, len }
static new(len) {
local h = externcall "nyash.intarray.new_h"(len)
local b = new IntArrayCore()
b.handle = h
b.len = len
return b
}
length() {
return externcall "nyash.intarray.len_h"(me.handle)
}
get_unchecked(idx) {
return externcall "nyash.intarray.get_hi"(me.handle, idx)
}
set_unchecked(idx, v) {
externcall "nyash.intarray.set_hii"(me.handle, idx, v)
return null
}
}

View File

@ -0,0 +1,65 @@
// MatI64 — simple i64 matrix box built on top of IntArrayCore.
// Internal layout: rows, cols, stride, core (IntArrayCore).
using nyash.core.numeric.intarray as IntArrayCore
static box MatI64 {
init { rows, cols, stride, core }
static new(rows, cols) {
local total = rows * cols
local core = IntArrayCore.new(total)
local m = new MatI64()
m.rows = rows
m.cols = cols
m.stride = cols
m.core = core
return m
}
rowsCount() {
return me.rows
}
colsCount() {
return me.cols
}
at(r, c) {
local idx = r * me.stride + c
return me.core.get_unchecked(idx)
}
set(r, c, v) {
local idx = r * me.stride + c
me.core.set_unchecked(idx, v)
return null
}
// Naive O(n^3) matmul: this * b
mul_naive(b) {
local n = me.rows
local mcols = me.cols
local bcols = b.cols
// assume shapes are compatible and square for now (Phase 21.6 draft)
local out = MatI64.new(n, bcols)
local i = 0
loop(i < n) {
local k = 0
loop(k < mcols) {
local aik = me.at(i, k)
local j = 0
loop(j < bcols) {
local idx = i * out.stride + j
local v = out.core.get_unchecked(idx) + aik * b.at(k, j)
out.core.set_unchecked(idx, v)
j = j + 1
}
k = k + 1
}
i = i + 1
}
return out
}
}

View File

@ -279,6 +279,10 @@ path = "lang/src/shared/common/string_helpers.hako"
# Temporary alias keys removed (Phase20.33 TTL reached). Use `selfhost.shared.*` above.
# Numeric core boxes (Phase 21.6)
"nyash.core.numeric.intarray" = "lang/src/runtime/numeric/intarray_core_box.hako"
"nyash.core.numeric.matrix_i64" = "lang/src/runtime/numeric/mat_i64_box.hako"
# v2 Plugin libraries (loader reads these for TypeBox ABI)
[libraries]
[libraries."libnyash_filebox_plugin.so"]

View File

@ -5,7 +5,7 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
BIN="$ROOT/target/release/hakorune"
usage() { echo "Usage: $0 --case {loop|strlen|box|branch|call|stringchain|arraymap|chip8|kilo|sieve|matmul|linidx|maplin} [--n N] [--runs R] [--backend {llvm|vm}] [--exe] [--budget-ms B]"; }
usage() { echo "Usage: $0 --case {loop|strlen|box|branch|call|stringchain|arraymap|chip8|kilo|sieve|matmul|matmul_core|linidx|maplin} [--n N] [--runs R] [--backend {llvm|vm}] [--exe] [--budget-ms B]"; }
CASE="loop"; N=5000000; RUNS=5; BACKEND="llvm"; EXE_MODE=0; BUDGET_MS=0
while [[ $# -gt 0 ]]; do
@ -562,6 +562,91 @@ C
rm -f "$TMP_CHECK_JSON" 2>/dev/null || true
fi
;;
matmul_core)
# Core numeric matmul using MatI64 + IntArrayCore
# Use smaller default N to keep runtime reasonable
if [[ "$EXE_MODE" = "1" && "$N" = "5000000" ]]; then
N=256
fi
HAKO_FILE=$(mktemp_hako)
cat >"$HAKO_FILE" <<HAKO
using nyash.core.numeric.matrix_i64 as MatI64
static box Main { method main(args) {
local n = ${N}
// Initialize A, B, C as n x n matrices
local A = MatI64.new(n, n)
local B = MatI64.new(n, n)
local C = MatI64.new(n, n)
local i = 0
loop(i < n) {
local j = 0
loop(j < n) {
local idx = i*n + j
A.set(i, j, idx % 97)
B.set(i, j, (idx * 3) % 101)
C.set(i, j, 0)
j = j + 1
}
i = i + 1
}
// Naive matmul via MatI64.mul_naive
local out = A.mul_naive(B)
return out.at(n-1, n-1)
} }
HAKO
C_FILE=$(mktemp_c)
cat >"$C_FILE" <<'C'
#include <stdint.h>
#include <stdlib.h>
typedef struct {
int64_t *ptr;
int64_t rows;
int64_t cols;
int64_t stride;
} MatI64Core;
static inline int64_t mat_get(MatI64Core *m, int64_t r, int64_t c) {
return m->ptr[r * m->stride + c];
}
static inline void mat_set(MatI64Core *m, int64_t r, int64_t c, int64_t v) {
m->ptr[r * m->stride + c] = v;
}
int main() {
int64_t n = N_PLACEHOLDER;
int64_t total = n * n;
MatI64Core A, B, C;
A.rows = B.rows = C.rows = n;
A.cols = B.cols = C.cols = n;
A.stride = B.stride = C.stride = n;
A.ptr = (int64_t*)malloc(sizeof(int64_t)*total);
B.ptr = (int64_t*)malloc(sizeof(int64_t)*total);
C.ptr = (int64_t*)malloc(sizeof(int64_t)*total);
for (int64_t idx = 0; idx < total; idx++) {
A.ptr[idx] = idx % 97;
B.ptr[idx] = (idx * 3) % 101;
C.ptr[idx] = 0;
}
for (int64_t i = 0; i < n; i++) {
for (int64_t k = 0; k < n; k++) {
int64_t aik = mat_get(&A, i, k);
for (int64_t j = 0; j < n; j++) {
int64_t idx = i * C.stride + j;
int64_t v = C.ptr[idx] + aik * mat_get(&B, k, j);
C.ptr[idx] = v;
}
}
}
int64_t r = mat_get(&C, n-1, n-1);
free(A.ptr); free(B.ptr); free(C.ptr);
return (int)(r & 0xFF);
}
C
sed -i "s/N_PLACEHOLDER/${N}/" "$C_FILE"
;;
linidx)
# Linear index pattern: idx = i*cols + j
# Derive rows/cols from N to keep runtime stable