Implement JSON v0 Bridge with full PHI support for If/Loop statements

Major implementation by ChatGPT:
- Complete JSON v0 Bridge layer with PHI generation for control flow
- If statement: Merge PHI nodes for variables updated in then/else branches
- Loop statement: Header PHI nodes for loop-carried dependencies
- Python MVP Parser Stage-2: Added local/if/loop/call/method/new support
- Full CFG guarantee: All blocks have proper terminators (branch/jump/return)
- Type metadata for string operations (+, ==, !=)
- Comprehensive PHI smoke tests for nested and edge cases

This allows MIR generation without Rust MIR builder - massive step towards
eliminating Rust build dependency!

🎉 ChatGPTが30分以上かけて実装してくれたにゃ!

Co-Authored-By: ChatGPT <noreply@openai.com>
This commit is contained in:
Selfhosting Dev
2025-09-14 23:22:05 +09:00
parent 5cad0ab20c
commit d01f9b9c93
11 changed files with 725 additions and 81 deletions

View File

@ -1,12 +1,25 @@
#!/usr/bin/env python3
"""
Ny parser MVP (Stage 1): Ny -> JSON v0
Ny parser MVP (Stage 2): Ny -> JSON v0
Grammar (subset):
program := [return] expr EOF
expr := term (('+'|'-') term)*
program := stmt* EOF
stmt := 'return' expr
| 'local' IDENT '=' expr
| 'if' expr block ('else' block)?
| 'loop' '(' expr ')' block
| expr # expression statement
block := '{' stmt* '}'
expr := logic
logic := compare (('&&'|'||') compare)*
compare := sum (('=='|'!='|'<'|'>'|'<='|'>=') sum)?
sum := term (('+'|'-') term)*
term := factor (('*'|'/') factor)*
factor := INT | STRING | '(' expr ')'
factor := INT | STRING | IDENT call_tail* | '(' expr ')' | 'new' IDENT '(' args? ')'
call_tail:= '.' IDENT '(' args? ')' # method
| '(' args? ')' # function call
args := expr (',' expr)*
Outputs JSON v0 compatible with --ny-parser-pipe.
"""
@ -16,30 +29,44 @@ class Tok:
def __init__(self, kind, val, pos):
self.kind, self.val, self.pos = kind, val, pos
KEYWORDS = {
'return':'RETURN', 'local':'LOCAL', 'if':'IF', 'else':'ELSE', 'loop':'LOOP', 'new':'NEW'
}
def lex(s: str):
i=n=0; n=len(s); out=[]
i=0; n=len(s); out=[]
def peek():
return s[i] if i<n else ''
while i<n:
c=s[i]
c = s[i]
if c.isspace():
i+=1; continue
if c in '+-*/()':
out.append(Tok(c,c,i)); i+=1; continue
if c.isdigit():
j=i
while j<n and s[j].isdigit():
j+=1
out.append(Tok('INT', int(s[i:j]), i)); i=j; continue
# two-char ops
if s.startswith('==', i) or s.startswith('!=', i) or s.startswith('<=', i) or s.startswith('>=', i) or s.startswith('&&', i) or s.startswith('||', i):
out.append(Tok('OP2', s[i:i+2], i)); i+=2; continue
if c in '+-*/(){}.,<>=':
out.append(Tok(c, c, i)); i+=1; continue
if c=='"':
j=i+1; buf=[]
while j<n:
if s[j]=='\\' and j+1<n:
buf.append(s[j+1]); j+=2; continue
if s[j]=='"':
j+=1; break
if s[j]=='"': j+=1; break
buf.append(s[j]); j+=1
out.append(Tok('STR',''.join(buf), i)); i=j; continue
if s.startswith('return', i):
out.append(Tok('RETURN','return', i)); i+=6; continue
if c.isdigit():
j=i
while j<n and s[j].isdigit(): j+=1
out.append(Tok('INT', int(s[i:j]), i)); i=j; continue
if c.isalpha() or c=='_':
j=i
while j<n and (s[j].isalnum() or s[j]=='_'): j+=1
ident = s[i:j]
if ident in KEYWORDS:
out.append(Tok(KEYWORDS[ident], ident, i))
else:
out.append(Tok('IDENT', ident, i))
i=j; continue
raise SyntaxError(f"lex: unexpected '{c}' at {i}")
out.append(Tok('EOF','',n))
return out
@ -52,34 +79,91 @@ class P:
return False
def expect(self,k):
if not self.eat(k): raise SyntaxError(f"expect {k} at {self.cur().pos}")
def program(self):
body=[]
while self.cur().kind!='EOF':
body.append(self.stmt())
return {"version":0, "kind":"Program", "body":body}
def stmt(self):
if self.eat('RETURN'):
e=self.expr(); return {"type":"Return","expr":e}
if self.eat('LOCAL'):
tok=self.cur(); self.expect('IDENT'); name=tok.val
self.expect('='); e=self.expr(); return {"type":"Local","name":name,"expr":e}
if self.eat('IF'):
cond=self.expr(); then=self.block(); els=None
if self.eat('ELSE'):
els=self.block()
return {"type":"If","cond":cond,"then":then,"else":els}
if self.eat('LOOP'):
self.expect('('); cond=self.expr(); self.expect(')'); body=self.block()
return {"type":"Loop","cond":cond,"body":body}
# expression statement
e=self.expr(); return {"type":"Expr","expr":e}
def block(self):
self.expect('{'); out=[]
while self.cur().kind!='}': out.append(self.stmt())
self.expect('}'); return out
def expr(self): return self.logic()
def logic(self):
lhs=self.compare()
while (self.cur().kind=='OP2' and self.cur().val in ('&&','||')):
op=self.cur().val; self.i+=1
rhs=self.compare(); lhs={"type":"Logical","op":op,"lhs":lhs,"rhs":rhs}
return lhs
def compare(self):
lhs=self.sum()
k=self.cur().kind; v=getattr(self.cur(),'val',None)
if (k=='OP2' and v in ('==','!=','<=','>=')) or k in ('<','>'):
op = v if k=='OP2' else self.cur().kind
self.i+=1
rhs=self.sum(); return {"type":"Compare","op":op,"lhs":lhs,"rhs":rhs}
return lhs
def sum(self):
lhs=self.term()
while self.cur().kind in ('+','-'):
op=self.cur().kind; self.i+=1
rhs=self.term(); lhs={"type":"Binary","op":op,"lhs":lhs,"rhs":rhs}
return lhs
def term(self):
lhs=self.factor()
while self.cur().kind in ('*','/'):
op=self.cur().kind; self.i+=1
rhs=self.factor(); lhs={"type":"Binary","op":op,"lhs":lhs,"rhs":rhs}
return lhs
def factor(self):
tok=self.cur()
if self.eat('INT'): return {"type":"Int","value":tok.val}
if self.eat('STR'): return {"type":"Str","value":tok.val}
if self.eat('('):
e=self.expr(); self.expect(')'); return e
if self.eat('NEW'):
t=self.cur(); self.expect('IDENT'); self.expect('(')
args=self.args_opt(); self.expect(')')
return {"type":"New","class":t.val,"args":args}
if self.eat('IDENT'):
node={"type":"Var","name":tok.val}
# call/methtail
while True:
if self.eat('('):
args=self.args_opt(); self.expect(')')
node={"type":"Call","name":tok.val,"args":args}
elif self.eat('.'):
m=self.cur(); self.expect('IDENT'); self.expect('(')
args=self.args_opt(); self.expect(')')
node={"type":"Method","recv":node,"method":m.val,"args":args}
else:
break
return node
raise SyntaxError(f"factor at {tok.pos}")
def term(self):
lhs=self.factor()
while self.cur().kind in ('*','/'):
op=self.cur().kind; self.i+=1
rhs=self.factor()
lhs={"type":"Binary","op":op,"lhs":lhs,"rhs":rhs}
return lhs
def expr(self):
lhs=self.term()
while self.cur().kind in ('+','-'):
op=self.cur().kind; self.i+=1
rhs=self.term()
lhs={"type":"Binary","op":op,"lhs":lhs,"rhs":rhs}
return lhs
def program(self):
if self.eat('RETURN'):
e=self.expr()
else:
e=self.expr()
self.expect('EOF')
return {"version":0, "kind":"Program", "body":[{"type":"Return","expr":e}]}
def args_opt(self):
args=[]
if self.cur().kind in (')',):
return args
args.append(self.expr())
while self.eat(','):
args.append(self.expr())
return args
def main():
if len(sys.argv)<2: