From 8e3bd1d70c17d6b550eddb2164b4ebf0153242fc Mon Sep 17 00:00:00 2001 From: dsarno Date: Wed, 24 Dec 2025 13:12:26 -0800 Subject: [PATCH] Fix/ci cleanup (#484) * CI: tighten NL/T reporting and fail job on missing/failed tests * CI: include test titles in summary checklist --- .claude/prompts/nl-unity-suite-nl.md | 6 +- .github/workflows/claude-nl-suite.yml | 112 +++++++++++++++++++++++++- 2 files changed, 111 insertions(+), 7 deletions(-) diff --git a/.claude/prompts/nl-unity-suite-nl.md b/.claude/prompts/nl-unity-suite-nl.md index 50ab1e0..18c2ecf 100644 --- a/.claude/prompts/nl-unity-suite-nl.md +++ b/.claude/prompts/nl-unity-suite-nl.md @@ -18,15 +18,15 @@ AllowedTools: Write,mcp__unity__manage_editor,mcp__unity__list_resources,mcp__un - Each file must contain EXACTLY one `` root element - NO prologue, epilogue, code fences, or extra characters - NO markdown formatting or explanations outside the XML -- Use this exact format: +- Use this exact shape (write the XML directly into the file; do not wrap it in ``` fences): -```xml -``` + +- Must end with the closing tag `` (well‑formed XML only). - If test fails, include: `` - TESTID must be one of: NL-0, NL-1, NL-2, NL-3, NL-4 diff --git a/.github/workflows/claude-nl-suite.yml b/.github/workflows/claude-nl-suite.yml index 54faaa9..8dee76c 100644 --- a/.github/workflows/claude-nl-suite.yml +++ b/.github/workflows/claude-nl-suite.yml @@ -858,9 +858,11 @@ jobs: from pathlib import Path import xml.etree.ElementTree as ET import re + import shutil DESIRED = ["NL-0","NL-1","NL-2","NL-3","NL-4","T-A","T-B","T-C","T-D","T-E","T-F","T-G","T-H","T-I","T-J"] seen = set() + bad = set() def id_from_filename(p: Path): n = p.name m = re.match(r'NL-?(\d+)_results\.xml$', n, re.I) @@ -872,12 +874,23 @@ jobs: return None for p in Path("reports").glob("*_results.xml"): + fid = id_from_filename(p) try: r = ET.parse(p).getroot() except Exception: + # If the file exists but isn't parseable, preserve it for debugging and + # treat it as a failing (malformed) fragment rather than "not produced". + if fid in DESIRED and p.exists() and p.stat().st_size > 0: + staging = Path("reports/_staging") + staging.mkdir(parents=True, exist_ok=True) + preserved = staging / f"{fid}_malformed.xml" + try: + shutil.copyfile(p, preserved) + except Exception: + pass + bad.add(fid) continue # Count by filename id primarily; fall back to testcase name if needed - fid = id_from_filename(p) if fid in DESIRED: seen.add(fid) continue @@ -894,8 +907,12 @@ jobs: continue frag = Path(f"reports/{d}_results.xml") tc = ET.Element("testcase", {"classname":"UnityMCP.NL-T", "name": d}) - fail = ET.SubElement(tc, "failure", {"message":"not produced"}) - fail.text = "The agent did not emit a fragment for this test." + if d in bad: + fail = ET.SubElement(tc, "failure", {"message":"malformed xml"}) + fail.text = "The agent wrote a fragment file, but it was not valid XML (parse failed). See reports/_staging/*_malformed.xml for the preserved original." + else: + fail = ET.SubElement(tc, "failure", {"message":"not produced"}) + fail.text = "The agent did not emit a fragment for this test." ET.ElementTree(tc).write(frag, encoding="utf-8", xml_declaration=False) print(f"backfill: {d}") PY @@ -1064,6 +1081,31 @@ jobs: name_map[tid] = (tc.get('name') or tid) desired = ['NL-0','NL-1','NL-2','NL-3','NL-4','T-A','T-B','T-C','T-D','T-E','T-F','T-G','T-H','T-I','T-J'] + default_titles = { + 'NL-0': 'Baseline State Capture', + 'NL-1': 'Core Method Operations', + 'NL-2': 'Anchor Comment Insertion', + 'NL-3': 'End-of-Class Content', + 'NL-4': 'Console State Verification', + 'T-A': 'Temporary Helper', + 'T-B': 'Method Body Interior', + 'T-C': 'Different Method Interior', + 'T-D': 'End-of-Class Helper', + 'T-E': 'Method Evolution', + 'T-F': 'Atomic Multi-Edit', + 'T-G': 'Path Normalization', + 'T-H': 'Validation on Modified', + 'T-I': 'Failure Surface', + 'T-J': 'Idempotency', + } + + def display_name(test_id: str) -> str: + # Prefer the emitted testcase "name" attribute (it may already include ID + title). + n = (name_map.get(test_id) or '').strip() + if n: + return n + t = (default_titles.get(test_id) or '').strip() + return f"{test_id} — {t}" if t else test_id total = len(cases) failures = sum(1 for tc in cases if (tc.find('failure') is not None or tc.find('error') is not None)) @@ -1079,7 +1121,8 @@ jobs: ] for p in desired: st = id_status.get(p, None) - lines.append(f"- [x] {p}" if st is True else (f"- [ ] {p} (fail)" if st is False else f"- [ ] {p} (not run)")) + label = display_name(p) + lines.append(f"- [x] {label}" if st is True else (f"- [ ] {label} (fail)" if st is False else f"- [ ] {label} (not run)")) lines.append('') lines.append('## Test Details') @@ -1136,6 +1179,67 @@ jobs: md_out.write_text('\n'.join(lines), encoding='utf-8') PY + # ---------- CI gate: fail job if any NL/T test missing or failed ---------- + - name: Fail CI if NL/T incomplete or failed + if: always() + shell: bash + run: | + python3 - <<'PY' + import os, re, sys + from pathlib import Path + import xml.etree.ElementTree as ET + + desired = ['NL-0','NL-1','NL-2','NL-3','NL-4','T-A','T-B','T-C','T-D','T-E','T-F','T-G','T-H','T-I','T-J'] + + junit_path = Path(os.environ.get('JUNIT_OUT', 'reports/junit-nl-suite.xml')) + if not junit_path.exists(): + print("::error::No JUnit output found; failing CI gate.") + sys.exit(1) + + def localname(tag: str) -> str: + return tag.rsplit('}', 1)[-1] if '}' in tag else tag + + tree = ET.parse(junit_path) + root = tree.getroot() + suite = root.find('./*') if localname(root.tag) == 'testsuites' else root + cases = [] if suite is None else list(suite.findall('.//testcase')) + + def id_from_case(tc): + name = (tc.get('name') or '').strip() + m = re.match(r'(NL-\d+|T-[A-Z])\b', name) + if m: + return m.group(1) + so = tc.find('system-out') + if so is not None and so.text: + m = re.search(r'\b(NL-\d+|T-[A-Z])\b', so.text) + if m: + return m.group(1) + return None + + # Determine status per desired ID (first occurrence wins, matching the summary builder) + id_status = {} + for tc in cases: + tid = id_from_case(tc) + if not tid or tid not in desired or tid in id_status: + continue + ok = (tc.find('failure') is None and tc.find('error') is None) + id_status[tid] = ok + + missing = [d for d in desired if d not in id_status] + failed = [d for d, ok in id_status.items() if ok is False] + + if missing: + print(f"::error::Missing NL/T tests in JUnit: {' '.join(missing)}") + if failed: + print(f"::error::Failing NL/T tests in JUnit: {' '.join(sorted(failed))}") + + # Gate: all desired must be present and passing + if missing or failed: + sys.exit(1) + + print("NL/T CI gate passed: all required tests present and passing.") + PY + # ---------- Collect execution transcript (if present) ---------- - name: Collect action execution transcript if: always()