Fix/ci cleanup (#484)

* CI: tighten NL/T reporting and fail job on missing/failed tests

* CI: include test titles in summary checklist
main
dsarno 2025-12-24 13:12:26 -08:00 committed by GitHub
parent 91b6f4d8d6
commit 8e3bd1d70c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 111 additions and 7 deletions

View File

@ -18,15 +18,15 @@ AllowedTools: Write,mcp__unity__manage_editor,mcp__unity__list_resources,mcp__un
- Each file must contain EXACTLY one `<testcase>` root element - Each file must contain EXACTLY one `<testcase>` root element
- NO prologue, epilogue, code fences, or extra characters - NO prologue, epilogue, code fences, or extra characters
- NO markdown formatting or explanations outside the XML - NO markdown formatting or explanations outside the XML
- Use this exact format: - Use this exact shape (write the XML directly into the file; do not wrap it in ``` fences):
```xml
<testcase name="NL-0 — Baseline State Capture" classname="UnityMCP.NL-T"> <testcase name="NL-0 — Baseline State Capture" classname="UnityMCP.NL-T">
<system-out><![CDATA[ <system-out><![CDATA[
(evidence of what was accomplished) (evidence of what was accomplished)
]]></system-out> ]]></system-out>
</testcase> </testcase>
```
- Must end with the closing tag `</testcase>` (wellformed XML only).
- If test fails, include: `<failure message="reason"/>` - If test fails, include: `<failure message="reason"/>`
- TESTID must be one of: NL-0, NL-1, NL-2, NL-3, NL-4 - TESTID must be one of: NL-0, NL-1, NL-2, NL-3, NL-4

View File

@ -858,9 +858,11 @@ jobs:
from pathlib import Path from pathlib import Path
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import re import re
import shutil
DESIRED = ["NL-0","NL-1","NL-2","NL-3","NL-4","T-A","T-B","T-C","T-D","T-E","T-F","T-G","T-H","T-I","T-J"] DESIRED = ["NL-0","NL-1","NL-2","NL-3","NL-4","T-A","T-B","T-C","T-D","T-E","T-F","T-G","T-H","T-I","T-J"]
seen = set() seen = set()
bad = set()
def id_from_filename(p: Path): def id_from_filename(p: Path):
n = p.name n = p.name
m = re.match(r'NL-?(\d+)_results\.xml$', n, re.I) m = re.match(r'NL-?(\d+)_results\.xml$', n, re.I)
@ -872,12 +874,23 @@ jobs:
return None return None
for p in Path("reports").glob("*_results.xml"): for p in Path("reports").glob("*_results.xml"):
fid = id_from_filename(p)
try: try:
r = ET.parse(p).getroot() r = ET.parse(p).getroot()
except Exception: except Exception:
# If the file exists but isn't parseable, preserve it for debugging and
# treat it as a failing (malformed) fragment rather than "not produced".
if fid in DESIRED and p.exists() and p.stat().st_size > 0:
staging = Path("reports/_staging")
staging.mkdir(parents=True, exist_ok=True)
preserved = staging / f"{fid}_malformed.xml"
try:
shutil.copyfile(p, preserved)
except Exception:
pass
bad.add(fid)
continue continue
# Count by filename id primarily; fall back to testcase name if needed # Count by filename id primarily; fall back to testcase name if needed
fid = id_from_filename(p)
if fid in DESIRED: if fid in DESIRED:
seen.add(fid) seen.add(fid)
continue continue
@ -894,8 +907,12 @@ jobs:
continue continue
frag = Path(f"reports/{d}_results.xml") frag = Path(f"reports/{d}_results.xml")
tc = ET.Element("testcase", {"classname":"UnityMCP.NL-T", "name": d}) tc = ET.Element("testcase", {"classname":"UnityMCP.NL-T", "name": d})
fail = ET.SubElement(tc, "failure", {"message":"not produced"}) if d in bad:
fail.text = "The agent did not emit a fragment for this test." fail = ET.SubElement(tc, "failure", {"message":"malformed xml"})
fail.text = "The agent wrote a fragment file, but it was not valid XML (parse failed). See reports/_staging/*_malformed.xml for the preserved original."
else:
fail = ET.SubElement(tc, "failure", {"message":"not produced"})
fail.text = "The agent did not emit a fragment for this test."
ET.ElementTree(tc).write(frag, encoding="utf-8", xml_declaration=False) ET.ElementTree(tc).write(frag, encoding="utf-8", xml_declaration=False)
print(f"backfill: {d}") print(f"backfill: {d}")
PY PY
@ -1064,6 +1081,31 @@ jobs:
name_map[tid] = (tc.get('name') or tid) name_map[tid] = (tc.get('name') or tid)
desired = ['NL-0','NL-1','NL-2','NL-3','NL-4','T-A','T-B','T-C','T-D','T-E','T-F','T-G','T-H','T-I','T-J'] desired = ['NL-0','NL-1','NL-2','NL-3','NL-4','T-A','T-B','T-C','T-D','T-E','T-F','T-G','T-H','T-I','T-J']
default_titles = {
'NL-0': 'Baseline State Capture',
'NL-1': 'Core Method Operations',
'NL-2': 'Anchor Comment Insertion',
'NL-3': 'End-of-Class Content',
'NL-4': 'Console State Verification',
'T-A': 'Temporary Helper',
'T-B': 'Method Body Interior',
'T-C': 'Different Method Interior',
'T-D': 'End-of-Class Helper',
'T-E': 'Method Evolution',
'T-F': 'Atomic Multi-Edit',
'T-G': 'Path Normalization',
'T-H': 'Validation on Modified',
'T-I': 'Failure Surface',
'T-J': 'Idempotency',
}
def display_name(test_id: str) -> str:
# Prefer the emitted testcase "name" attribute (it may already include ID + title).
n = (name_map.get(test_id) or '').strip()
if n:
return n
t = (default_titles.get(test_id) or '').strip()
return f"{test_id} — {t}" if t else test_id
total = len(cases) total = len(cases)
failures = sum(1 for tc in cases if (tc.find('failure') is not None or tc.find('error') is not None)) failures = sum(1 for tc in cases if (tc.find('failure') is not None or tc.find('error') is not None))
@ -1079,7 +1121,8 @@ jobs:
] ]
for p in desired: for p in desired:
st = id_status.get(p, None) st = id_status.get(p, None)
lines.append(f"- [x] {p}" if st is True else (f"- [ ] {p} (fail)" if st is False else f"- [ ] {p} (not run)")) label = display_name(p)
lines.append(f"- [x] {label}" if st is True else (f"- [ ] {label} (fail)" if st is False else f"- [ ] {label} (not run)"))
lines.append('') lines.append('')
lines.append('## Test Details') lines.append('## Test Details')
@ -1136,6 +1179,67 @@ jobs:
md_out.write_text('\n'.join(lines), encoding='utf-8') md_out.write_text('\n'.join(lines), encoding='utf-8')
PY PY
# ---------- CI gate: fail job if any NL/T test missing or failed ----------
- name: Fail CI if NL/T incomplete or failed
if: always()
shell: bash
run: |
python3 - <<'PY'
import os, re, sys
from pathlib import Path
import xml.etree.ElementTree as ET
desired = ['NL-0','NL-1','NL-2','NL-3','NL-4','T-A','T-B','T-C','T-D','T-E','T-F','T-G','T-H','T-I','T-J']
junit_path = Path(os.environ.get('JUNIT_OUT', 'reports/junit-nl-suite.xml'))
if not junit_path.exists():
print("::error::No JUnit output found; failing CI gate.")
sys.exit(1)
def localname(tag: str) -> str:
return tag.rsplit('}', 1)[-1] if '}' in tag else tag
tree = ET.parse(junit_path)
root = tree.getroot()
suite = root.find('./*') if localname(root.tag) == 'testsuites' else root
cases = [] if suite is None else list(suite.findall('.//testcase'))
def id_from_case(tc):
name = (tc.get('name') or '').strip()
m = re.match(r'(NL-\d+|T-[A-Z])\b', name)
if m:
return m.group(1)
so = tc.find('system-out')
if so is not None and so.text:
m = re.search(r'\b(NL-\d+|T-[A-Z])\b', so.text)
if m:
return m.group(1)
return None
# Determine status per desired ID (first occurrence wins, matching the summary builder)
id_status = {}
for tc in cases:
tid = id_from_case(tc)
if not tid or tid not in desired or tid in id_status:
continue
ok = (tc.find('failure') is None and tc.find('error') is None)
id_status[tid] = ok
missing = [d for d in desired if d not in id_status]
failed = [d for d, ok in id_status.items() if ok is False]
if missing:
print(f"::error::Missing NL/T tests in JUnit: {' '.join(missing)}")
if failed:
print(f"::error::Failing NL/T tests in JUnit: {' '.join(sorted(failed))}")
# Gate: all desired must be present and passing
if missing or failed:
sys.exit(1)
print("NL/T CI gate passed: all required tests present and passing.")
PY
# ---------- Collect execution transcript (if present) ---------- # ---------- Collect execution transcript (if present) ----------
- name: Collect action execution transcript - name: Collect action execution transcript
if: always() if: always()