Fix/ci cleanup (#484)
* CI: tighten NL/T reporting and fail job on missing/failed tests * CI: include test titles in summary checklistmain
parent
91b6f4d8d6
commit
8e3bd1d70c
|
|
@ -18,15 +18,15 @@ AllowedTools: Write,mcp__unity__manage_editor,mcp__unity__list_resources,mcp__un
|
|||
- Each file must contain EXACTLY one `<testcase>` root element
|
||||
- NO prologue, epilogue, code fences, or extra characters
|
||||
- NO markdown formatting or explanations outside the XML
|
||||
- Use this exact format:
|
||||
- Use this exact shape (write the XML directly into the file; do not wrap it in ``` fences):
|
||||
|
||||
```xml
|
||||
<testcase name="NL-0 — Baseline State Capture" classname="UnityMCP.NL-T">
|
||||
<system-out><![CDATA[
|
||||
(evidence of what was accomplished)
|
||||
]]></system-out>
|
||||
</testcase>
|
||||
```
|
||||
|
||||
- Must end with the closing tag `</testcase>` (well‑formed XML only).
|
||||
|
||||
- If test fails, include: `<failure message="reason"/>`
|
||||
- TESTID must be one of: NL-0, NL-1, NL-2, NL-3, NL-4
|
||||
|
|
|
|||
|
|
@ -858,9 +858,11 @@ jobs:
|
|||
from pathlib import Path
|
||||
import xml.etree.ElementTree as ET
|
||||
import re
|
||||
import shutil
|
||||
|
||||
DESIRED = ["NL-0","NL-1","NL-2","NL-3","NL-4","T-A","T-B","T-C","T-D","T-E","T-F","T-G","T-H","T-I","T-J"]
|
||||
seen = set()
|
||||
bad = set()
|
||||
def id_from_filename(p: Path):
|
||||
n = p.name
|
||||
m = re.match(r'NL-?(\d+)_results\.xml$', n, re.I)
|
||||
|
|
@ -872,12 +874,23 @@ jobs:
|
|||
return None
|
||||
|
||||
for p in Path("reports").glob("*_results.xml"):
|
||||
fid = id_from_filename(p)
|
||||
try:
|
||||
r = ET.parse(p).getroot()
|
||||
except Exception:
|
||||
# If the file exists but isn't parseable, preserve it for debugging and
|
||||
# treat it as a failing (malformed) fragment rather than "not produced".
|
||||
if fid in DESIRED and p.exists() and p.stat().st_size > 0:
|
||||
staging = Path("reports/_staging")
|
||||
staging.mkdir(parents=True, exist_ok=True)
|
||||
preserved = staging / f"{fid}_malformed.xml"
|
||||
try:
|
||||
shutil.copyfile(p, preserved)
|
||||
except Exception:
|
||||
pass
|
||||
bad.add(fid)
|
||||
continue
|
||||
# Count by filename id primarily; fall back to testcase name if needed
|
||||
fid = id_from_filename(p)
|
||||
if fid in DESIRED:
|
||||
seen.add(fid)
|
||||
continue
|
||||
|
|
@ -894,8 +907,12 @@ jobs:
|
|||
continue
|
||||
frag = Path(f"reports/{d}_results.xml")
|
||||
tc = ET.Element("testcase", {"classname":"UnityMCP.NL-T", "name": d})
|
||||
fail = ET.SubElement(tc, "failure", {"message":"not produced"})
|
||||
fail.text = "The agent did not emit a fragment for this test."
|
||||
if d in bad:
|
||||
fail = ET.SubElement(tc, "failure", {"message":"malformed xml"})
|
||||
fail.text = "The agent wrote a fragment file, but it was not valid XML (parse failed). See reports/_staging/*_malformed.xml for the preserved original."
|
||||
else:
|
||||
fail = ET.SubElement(tc, "failure", {"message":"not produced"})
|
||||
fail.text = "The agent did not emit a fragment for this test."
|
||||
ET.ElementTree(tc).write(frag, encoding="utf-8", xml_declaration=False)
|
||||
print(f"backfill: {d}")
|
||||
PY
|
||||
|
|
@ -1064,6 +1081,31 @@ jobs:
|
|||
name_map[tid] = (tc.get('name') or tid)
|
||||
|
||||
desired = ['NL-0','NL-1','NL-2','NL-3','NL-4','T-A','T-B','T-C','T-D','T-E','T-F','T-G','T-H','T-I','T-J']
|
||||
default_titles = {
|
||||
'NL-0': 'Baseline State Capture',
|
||||
'NL-1': 'Core Method Operations',
|
||||
'NL-2': 'Anchor Comment Insertion',
|
||||
'NL-3': 'End-of-Class Content',
|
||||
'NL-4': 'Console State Verification',
|
||||
'T-A': 'Temporary Helper',
|
||||
'T-B': 'Method Body Interior',
|
||||
'T-C': 'Different Method Interior',
|
||||
'T-D': 'End-of-Class Helper',
|
||||
'T-E': 'Method Evolution',
|
||||
'T-F': 'Atomic Multi-Edit',
|
||||
'T-G': 'Path Normalization',
|
||||
'T-H': 'Validation on Modified',
|
||||
'T-I': 'Failure Surface',
|
||||
'T-J': 'Idempotency',
|
||||
}
|
||||
|
||||
def display_name(test_id: str) -> str:
|
||||
# Prefer the emitted testcase "name" attribute (it may already include ID + title).
|
||||
n = (name_map.get(test_id) or '').strip()
|
||||
if n:
|
||||
return n
|
||||
t = (default_titles.get(test_id) or '').strip()
|
||||
return f"{test_id} — {t}" if t else test_id
|
||||
|
||||
total = len(cases)
|
||||
failures = sum(1 for tc in cases if (tc.find('failure') is not None or tc.find('error') is not None))
|
||||
|
|
@ -1079,7 +1121,8 @@ jobs:
|
|||
]
|
||||
for p in desired:
|
||||
st = id_status.get(p, None)
|
||||
lines.append(f"- [x] {p}" if st is True else (f"- [ ] {p} (fail)" if st is False else f"- [ ] {p} (not run)"))
|
||||
label = display_name(p)
|
||||
lines.append(f"- [x] {label}" if st is True else (f"- [ ] {label} (fail)" if st is False else f"- [ ] {label} (not run)"))
|
||||
lines.append('')
|
||||
|
||||
lines.append('## Test Details')
|
||||
|
|
@ -1136,6 +1179,67 @@ jobs:
|
|||
md_out.write_text('\n'.join(lines), encoding='utf-8')
|
||||
PY
|
||||
|
||||
# ---------- CI gate: fail job if any NL/T test missing or failed ----------
|
||||
- name: Fail CI if NL/T incomplete or failed
|
||||
if: always()
|
||||
shell: bash
|
||||
run: |
|
||||
python3 - <<'PY'
|
||||
import os, re, sys
|
||||
from pathlib import Path
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
desired = ['NL-0','NL-1','NL-2','NL-3','NL-4','T-A','T-B','T-C','T-D','T-E','T-F','T-G','T-H','T-I','T-J']
|
||||
|
||||
junit_path = Path(os.environ.get('JUNIT_OUT', 'reports/junit-nl-suite.xml'))
|
||||
if not junit_path.exists():
|
||||
print("::error::No JUnit output found; failing CI gate.")
|
||||
sys.exit(1)
|
||||
|
||||
def localname(tag: str) -> str:
|
||||
return tag.rsplit('}', 1)[-1] if '}' in tag else tag
|
||||
|
||||
tree = ET.parse(junit_path)
|
||||
root = tree.getroot()
|
||||
suite = root.find('./*') if localname(root.tag) == 'testsuites' else root
|
||||
cases = [] if suite is None else list(suite.findall('.//testcase'))
|
||||
|
||||
def id_from_case(tc):
|
||||
name = (tc.get('name') or '').strip()
|
||||
m = re.match(r'(NL-\d+|T-[A-Z])\b', name)
|
||||
if m:
|
||||
return m.group(1)
|
||||
so = tc.find('system-out')
|
||||
if so is not None and so.text:
|
||||
m = re.search(r'\b(NL-\d+|T-[A-Z])\b', so.text)
|
||||
if m:
|
||||
return m.group(1)
|
||||
return None
|
||||
|
||||
# Determine status per desired ID (first occurrence wins, matching the summary builder)
|
||||
id_status = {}
|
||||
for tc in cases:
|
||||
tid = id_from_case(tc)
|
||||
if not tid or tid not in desired or tid in id_status:
|
||||
continue
|
||||
ok = (tc.find('failure') is None and tc.find('error') is None)
|
||||
id_status[tid] = ok
|
||||
|
||||
missing = [d for d in desired if d not in id_status]
|
||||
failed = [d for d, ok in id_status.items() if ok is False]
|
||||
|
||||
if missing:
|
||||
print(f"::error::Missing NL/T tests in JUnit: {' '.join(missing)}")
|
||||
if failed:
|
||||
print(f"::error::Failing NL/T tests in JUnit: {' '.join(sorted(failed))}")
|
||||
|
||||
# Gate: all desired must be present and passing
|
||||
if missing or failed:
|
||||
sys.exit(1)
|
||||
|
||||
print("NL/T CI gate passed: all required tests present and passing.")
|
||||
PY
|
||||
|
||||
# ---------- Collect execution transcript (if present) ----------
|
||||
- name: Collect action execution transcript
|
||||
if: always()
|
||||
|
|
|
|||
Loading…
Reference in New Issue