🔍 Research: GPT-5-Codex capabilities and GitHub PR integration

## Summary
Investigated OpenAI's new GPT-5-Codex model and Codex GitHub PR review integration capabilities.

## GPT-5-Codex Analysis

### Benchmark Performance (Good)
- SWE-bench Verified: 74.5% (vs GPT-5's 72.8%)
- Refactoring tasks: 51.3% (vs GPT-5's 33.9%)
- Code review: Higher developer ratings

### Real-World Issues (Concerning)
- Users report degraded coding performance
- Scripts that previously worked now fail
- Less consistent than GPT-4.5
- Longer response times (minutes vs instant)
- "Creatively and emotionally flat"
- Basic errors (e.g., counting letters incorrectly)

### Key Finding
Classic case of "optimizing for benchmarks vs real usability" - scores well on tests but performs poorly in practice.

## Codex GitHub PR Integration

### Setup Process
1. Enable MFA and connect GitHub account
2. Authorize Codex GitHub app for repos
3. Enable "Code review" in repository settings

### Usage Methods
- **Manual**: Comment '@codex review' in PR
- **Automatic**: Triggers when PR moves from draft to ready

### Current Limitations
- One-way communication (doesn't respond to review comments)
- Prefers creating new PRs over updating existing ones
- Better for single-pass reviews than iterative feedback

## 'codex resume' Feature
New session management capability:
- Resume previous codex exec sessions
- Useful for continuing long tasks across days
- Maintains context from interrupted work

🐱 The investigation reveals that while GPT-5-Codex shows benchmark improvements, practical developer experience has declined - a reminder that metrics don't always reflect real-world utility\!
This commit is contained in:
Selfhosting Dev
2025-09-16 16:28:25 +09:00
parent 47f4ca0e44
commit 63c8fda808
41 changed files with 854 additions and 146 deletions

View File

@ -202,7 +202,45 @@ fn lower_expr(f: &mut MirFunction, cur_bb: BasicBlockId, e: &ExprV0) -> Result<(
Ok((out, merge_bb))
}
ExprV0::Call { name, args } => {
// Fallback: no vars context; treat as normal call
// Special: array literal lowering — Call{name:"array.of", args:[...]} → new ArrayBox(); push(...); result=array
if name == "array.of" {
// Create array first
let arr = f.next_value_id();
if let Some(bb) = f.get_block_mut(cur_bb) {
bb.add_instruction(MirInstruction::NewBox { dst: arr, box_type: "ArrayBox".into(), args: vec![] });
}
// For each element: eval then push
let mut cur = cur_bb;
for e in args {
let (v, c) = lower_expr(f, cur, e)?; cur = c;
let tmp = f.next_value_id();
if let Some(bb) = f.get_block_mut(cur) {
bb.add_instruction(MirInstruction::BoxCall { dst: Some(tmp), box_val: arr, method: "push".into(), method_id: None, args: vec![v], effects: EffectMask::READ });
}
}
return Ok((arr, cur));
}
// Special: map literal lowering — Call{name:"map.of", args:[k1, v1, k2, v2, ...]} → new MapBox(); set(k,v)...; result=map
if name == "map.of" {
let mapv = f.next_value_id();
if let Some(bb) = f.get_block_mut(cur_bb) {
bb.add_instruction(MirInstruction::NewBox { dst: mapv, box_type: "MapBox".into(), args: vec![] });
}
let mut cur = cur_bb;
let mut it = args.iter();
while let Some(k) = it.next() {
if let Some(v) = it.next() {
let (kv, cur2) = lower_expr(f, cur, k)?; cur = cur2;
let (vv, cur3) = lower_expr(f, cur, v)?; cur = cur3;
let tmp = f.next_value_id();
if let Some(bb) = f.get_block_mut(cur) {
bb.add_instruction(MirInstruction::BoxCall { dst: Some(tmp), box_val: mapv, method: "set".into(), method_id: None, args: vec![kv, vv], effects: EffectMask::READ });
}
} else { break; }
}
return Ok((mapv, cur));
}
// Fallback: treat as normal dynamic call
let (arg_ids, cur) = lower_args(f, cur_bb, args)?;
let fun_val = f.next_value_id();
if let Some(bb) = f.get_block_mut(cur) {
@ -273,6 +311,42 @@ fn lower_expr_with_vars(
Err(format!("undefined variable: {}", name))
}
ExprV0::Call { name, args } => {
// Special: array literal lowering in vars context
if name == "array.of" {
let arr = f.next_value_id();
if let Some(bb) = f.get_block_mut(cur_bb) {
bb.add_instruction(MirInstruction::NewBox { dst: arr, box_type: "ArrayBox".into(), args: vec![] });
}
let mut cur = cur_bb;
for e in args {
let (v, c) = lower_expr_with_vars(f, cur, e, vars)?; cur = c;
let tmp = f.next_value_id();
if let Some(bb) = f.get_block_mut(cur) {
bb.add_instruction(MirInstruction::BoxCall { dst: Some(tmp), box_val: arr, method: "push".into(), method_id: None, args: vec![v], effects: EffectMask::READ });
}
}
return Ok((arr, cur));
}
// Special: map literal lowering in vars context
if name == "map.of" {
let mapv = f.next_value_id();
if let Some(bb) = f.get_block_mut(cur_bb) {
bb.add_instruction(MirInstruction::NewBox { dst: mapv, box_type: "MapBox".into(), args: vec![] });
}
let mut cur = cur_bb;
let mut it = args.iter();
while let Some(k) = it.next() {
if let Some(v) = it.next() {
let (kv, cur2) = lower_expr_with_vars(f, cur, k, vars)?; cur = cur2;
let (vv, cur3) = lower_expr_with_vars(f, cur, v, vars)?; cur = cur3;
let tmp = f.next_value_id();
if let Some(bb) = f.get_block_mut(cur) {
bb.add_instruction(MirInstruction::BoxCall { dst: Some(tmp), box_val: mapv, method: "set".into(), method_id: None, args: vec![kv, vv], effects: EffectMask::READ });
}
} else { break; }
}
return Ok((mapv, cur));
}
// Lower args
let (arg_ids, cur) = lower_args_with_vars(f, cur_bb, args, vars)?;
// Encode as: const fun_name; call