Files
hakorune/tools/ny_parser_mvp.py
Selfhosting Dev 3ba96d9a03 🚀 feat: Multiple improvements for Nyash parser and LLVM backend
Parser improvements:
- Added expression statement fallback in parse_statement() for flexible syntax
- Fixed ternary operator to use PeekExpr instead of If AST (better lowering)
- Added peek_token() check to avoid ?/?: operator conflicts

LLVM Python improvements:
- Added optional ESC_JSON_FIX environment flag for string concatenation
- Improved PHI generation with better default handling
- Enhanced substring tracking for esc_json pattern

Documentation updates:
- Updated language guide with peek expression examples
- Added box theory diagrams to Phase 15 planning
- Clarified peek vs when syntax differences

These changes enable cleaner parser implementation for self-hosting,
especially for handling digit conversion with peek expressions instead
of 19-line if-else chains.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-14 19:16:32 +09:00

95 lines
2.9 KiB
Python

#!/usr/bin/env python3
"""
Ny parser MVP (Stage 1): Ny -> JSON v0
Grammar (subset):
program := [return] expr EOF
expr := term (('+'|'-') term)*
term := factor (('*'|'/') factor)*
factor := INT | STRING | '(' expr ')'
Outputs JSON v0 compatible with --ny-parser-pipe.
"""
import sys, re, json
class Tok:
def __init__(self, kind, val, pos):
self.kind, self.val, self.pos = kind, val, pos
def lex(s: str):
i=n=0; n=len(s); out=[]
while i<n:
c=s[i]
if c.isspace():
i+=1; continue
if c in '+-*/()':
out.append(Tok(c,c,i)); i+=1; continue
if c.isdigit():
j=i
while j<n and s[j].isdigit():
j+=1
out.append(Tok('INT', int(s[i:j]), i)); i=j; continue
if c=='"':
j=i+1; buf=[]
while j<n:
if s[j]=='\\' and j+1<n:
buf.append(s[j+1]); j+=2; continue
if s[j]=='"':
j+=1; break
buf.append(s[j]); j+=1
out.append(Tok('STR',''.join(buf), i)); i=j; continue
if s.startswith('return', i):
out.append(Tok('RETURN','return', i)); i+=6; continue
raise SyntaxError(f"lex: unexpected '{c}' at {i}")
out.append(Tok('EOF','',n))
return out
class P:
def __init__(self,toks): self.t=toks; self.i=0
def cur(self): return self.t[self.i]
def eat(self,k):
if self.cur().kind==k: self.i+=1; return True
return False
def expect(self,k):
if not self.eat(k): raise SyntaxError(f"expect {k} at {self.cur().pos}")
def factor(self):
tok=self.cur()
if self.eat('INT'): return {"type":"Int","value":tok.val}
if self.eat('STR'): return {"type":"Str","value":tok.val}
if self.eat('('):
e=self.expr(); self.expect(')'); return e
raise SyntaxError(f"factor at {tok.pos}")
def term(self):
lhs=self.factor()
while self.cur().kind in ('*','/'):
op=self.cur().kind; self.i+=1
rhs=self.factor()
lhs={"type":"Binary","op":op,"lhs":lhs,"rhs":rhs}
return lhs
def expr(self):
lhs=self.term()
while self.cur().kind in ('+','-'):
op=self.cur().kind; self.i+=1
rhs=self.term()
lhs={"type":"Binary","op":op,"lhs":lhs,"rhs":rhs}
return lhs
def program(self):
if self.eat('RETURN'):
e=self.expr()
else:
e=self.expr()
self.expect('EOF')
return {"version":0, "kind":"Program", "body":[{"type":"Return","expr":e}]}
def main():
if len(sys.argv)<2:
print("usage: ny_parser_mvp.py <file.nyash>", file=sys.stderr); sys.exit(1)
with open(sys.argv[1],'r',encoding='utf-8') as f:
src=f.read()
toks=lex(src)
prog=P(toks).program()
print(json.dumps(prog, ensure_ascii=False))
if __name__=='__main__':
main()