unity-mcp/MCPForUnity/Editor/Services/TestJobManager.cs

607 lines
23 KiB
C#
Raw Normal View History

Async Test Infrastructure & Editor Readiness Status + new refresh_unity tool (#507) * Add editor readiness v2, refresh tool, and preflight guards * Detect external package changes and harden refresh retry * feat: add TestRunnerNoThrottle and async test running with background stall prevention - Add TestRunnerNoThrottle.cs: Sets editor to 'No Throttling' mode during test runs with SessionState persistence across domain reload - Add run_tests_async and get_test_job tools for non-blocking test execution - Add TestJobManager for async test job tracking with progress monitoring - Add ForceSynchronousImport to all AssetDatabase.Refresh() calls to prevent stalls - Mark DomainReloadResilienceTests as [Explicit] with documentation explaining the test infrastructure limitation (internal coroutine waits vs MCP socket polling) - MCP workflow is unaffected - socket messages provide external stimulus that keeps Unity responsive even when backgrounded * refactor: simplify and clean up code - Remove unused Newtonsoft.Json.Linq import from TestJobManager - Add throttling to SessionState persistence (once per second) to reduce overhead - Critical job state changes (start/finish) still persist immediately - Fix duplicate XML summary tag in DomainReloadResilienceTests * docs: add async test tools to README, document domain reload limitation - Add run_tests_async and get_test_job to main README tools list - Document background stall limitation for domain reload tests in DEV readme * ci: add separate job for domain reload tests Run [Explicit] domain_reload tests in their own job using -testCategory * ci: run domain reload tests in same job as regular tests Combines into single job with two test steps to reuse cached Library * fix: address coderabbit review issues - Fix TOCTOU race in TestJobManager.StartJob (single lock scope for check-and-set) - Store TestRunnerApi reference with HideAndDontSave to prevent GC/serialization issues * docs: update tool descriptions to prefer run_tests_async - run_tests_async is now marked as preferred for long-running suites - run_tests description notes it blocks and suggests async alternative * docs: update README screenshot to v8.6 UI * docs: add v8.6 UI screenshot * Update README for MCP version and instructions for v8.7 * fix: handle preflight busy signals and derive job status from test results - manage_asset, manage_gameobject, manage_scene now check preflight return value and propagate busy/retry signals to clients (fixes Sourcery #1) - TestJobManager.FinalizeCurrentJobFromRunFinished now sets job status to Failed when resultPayload.Failed > 0, not always Succeeded (fixes Sourcery #2) * fix: increase HTTP server startup timeout for dev mode When 'Force fresh server install' is enabled, uvx uses --no-cache --refresh which rebuilds the package and takes significantly longer to start. - Increase timeout from 10s to 45s when dev mode is enabled - Add informative log message explaining the longer startup time - Show actual timeout value in warning message * fix: derive job status from test results in FinalizeFromTask fallback Apply same logic as FinalizeCurrentJobFromRunFinished: check result.Failed > 0 to correctly mark jobs as Failed when tests fail, even in the fallback path when RunFinished callback is not delivered.
2026-01-04 04:42:32 +08:00
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using MCPForUnity.Editor.Helpers;
using Newtonsoft.Json;
using UnityEditor;
using UnityEditorInternal;
using UnityEditor.TestTools.TestRunner.Api;
namespace MCPForUnity.Editor.Services
{
internal enum TestJobStatus
{
Running,
Succeeded,
Failed
}
internal sealed class TestJobFailure
{
public string FullName { get; set; }
public string Message { get; set; }
}
internal sealed class TestJob
{
public string JobId { get; set; }
public TestJobStatus Status { get; set; }
public string Mode { get; set; }
public long StartedUnixMs { get; set; }
public long? FinishedUnixMs { get; set; }
public long LastUpdateUnixMs { get; set; }
public int? TotalTests { get; set; }
public int CompletedTests { get; set; }
public string CurrentTestFullName { get; set; }
public long? CurrentTestStartedUnixMs { get; set; }
public string LastFinishedTestFullName { get; set; }
public long? LastFinishedUnixMs { get; set; }
public List<TestJobFailure> FailuresSoFar { get; set; }
public string Error { get; set; }
public TestRunResult Result { get; set; }
}
/// <summary>
/// Tracks async test jobs started via MCP tools. This is not intended to capture manual Test Runner UI runs.
/// </summary>
internal static class TestJobManager
{
// Keep this small to avoid ballooning payloads during polling.
private const int FailureCap = 25;
private const long StuckThresholdMs = 60_000;
private const int MaxJobsToKeep = 10;
private const long MinPersistIntervalMs = 1000; // Throttle persistence to reduce overhead
// SessionState survives domain reloads within the same Unity Editor session.
private const string SessionKeyJobs = "MCPForUnity.TestJobsV1";
private const string SessionKeyCurrentJobId = "MCPForUnity.CurrentTestJobIdV1";
private static readonly object LockObj = new();
private static readonly Dictionary<string, TestJob> Jobs = new();
private static string _currentJobId;
private static long _lastPersistUnixMs;
static TestJobManager()
{
// Restore after domain reloads (e.g., compilation while a job is running).
TryRestoreFromSessionState();
}
public static string CurrentJobId
{
get { lock (LockObj) return _currentJobId; }
}
public static bool HasRunningJob
{
get
{
lock (LockObj)
{
return !string.IsNullOrEmpty(_currentJobId);
}
}
}
private sealed class PersistedState
{
public string current_job_id { get; set; }
public List<PersistedJob> jobs { get; set; }
}
private sealed class PersistedJob
{
public string job_id { get; set; }
public string status { get; set; }
public string mode { get; set; }
public long started_unix_ms { get; set; }
public long? finished_unix_ms { get; set; }
public long last_update_unix_ms { get; set; }
public int? total_tests { get; set; }
public int completed_tests { get; set; }
public string current_test_full_name { get; set; }
public long? current_test_started_unix_ms { get; set; }
public string last_finished_test_full_name { get; set; }
public long? last_finished_unix_ms { get; set; }
public List<TestJobFailure> failures_so_far { get; set; }
public string error { get; set; }
}
private static TestJobStatus ParseStatus(string status)
{
if (string.IsNullOrWhiteSpace(status))
{
return TestJobStatus.Running;
}
string s = status.Trim().ToLowerInvariant();
return s switch
{
"succeeded" => TestJobStatus.Succeeded,
"failed" => TestJobStatus.Failed,
_ => TestJobStatus.Running
};
}
private static void TryRestoreFromSessionState()
{
try
{
string json = SessionState.GetString(SessionKeyJobs, string.Empty);
if (string.IsNullOrWhiteSpace(json))
{
var legacy = SessionState.GetString(SessionKeyCurrentJobId, string.Empty);
_currentJobId = string.IsNullOrWhiteSpace(legacy) ? null : legacy;
return;
}
var state = JsonConvert.DeserializeObject<PersistedState>(json);
if (state?.jobs == null)
{
return;
}
lock (LockObj)
{
Jobs.Clear();
foreach (var pj in state.jobs)
{
if (pj == null || string.IsNullOrWhiteSpace(pj.job_id))
{
continue;
}
Jobs[pj.job_id] = new TestJob
{
JobId = pj.job_id,
Status = ParseStatus(pj.status),
Mode = pj.mode,
StartedUnixMs = pj.started_unix_ms,
FinishedUnixMs = pj.finished_unix_ms,
LastUpdateUnixMs = pj.last_update_unix_ms,
TotalTests = pj.total_tests,
CompletedTests = pj.completed_tests,
CurrentTestFullName = pj.current_test_full_name,
CurrentTestStartedUnixMs = pj.current_test_started_unix_ms,
LastFinishedTestFullName = pj.last_finished_test_full_name,
LastFinishedUnixMs = pj.last_finished_unix_ms,
FailuresSoFar = pj.failures_so_far ?? new List<TestJobFailure>(),
Error = pj.error,
// Intentionally not persisted to avoid ballooning SessionState.
Result = null
};
}
_currentJobId = string.IsNullOrWhiteSpace(state.current_job_id) ? null : state.current_job_id;
if (!string.IsNullOrEmpty(_currentJobId) && !Jobs.ContainsKey(_currentJobId))
{
_currentJobId = null;
}
Fix PlayMode tests stalling when unfocused (python refresh utility), improve domain reload recovery and refresh tool (#554) * Fix test job state management after domain reload - TestRunnerService.RunFinished: Always clean up job state even when _runCompletionSource is null (happens after PlayMode domain reload) - TestJobManager: Detect and clear stale jobs (5+ min without updates) on startup to recover from stuck state after domain reload - refresh_unity.py: Add "could not connect" to retryable errors when wait_for_ready=True, so connection failures during domain reload trigger waiting instead of immediate failure Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * Add focus nudge to handle OS-level throttling during PlayMode tests When Unity is unfocused, macOS App Nap (and similar OS features) can throttle the process, causing PlayMode tests to stall even with Unity No Throttling mode enabled. Changes: - Add ApplyNoThrottlingPreemptive() to TestRunnerNoThrottle for early throttle prevention before PlayMode Execute() - Add focus_nudge.py utility that temporarily focuses Unity and returns focus to the original app (supports macOS, Windows, Linux) - Integrate focus nudge into get_test_job polling - when tests appear stalled (unfocused + no progress for 10s), automatically nudge Unity Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * Fix code review issues in focus_nudge.py - Remove redundant time import (already imported at module level) - Escape window titles in PowerShell script to prevent injection - Remove unused Callable import Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * Improve focus nudge logging and fix skipped tests - Improve logging in focus_nudge.py: rate limit skip and focus return at INFO level - Improve logging in run_tests.py: show nudge completion status - Fix path resolution in test_logging_stdout.py and test_transport_framing.py - Add PlayMode tests to UnityMCPTests project for testing PlayMode runner Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * Add troubleshooting note about focus permission requests When running PlayMode tests with Unity in the background, the focus nudge feature may trigger OS permission prompts (especially on macOS for accessibility/automation). Document this expected behavior. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-15 07:02:40 +08:00
// Detect and clean up stale "running" jobs that were orphaned by domain reload.
// After a domain reload, TestRunStatus resets to not-running, but _currentJobId
// may still be set. If the job hasn't been updated recently, it's likely orphaned.
if (!string.IsNullOrEmpty(_currentJobId) && Jobs.TryGetValue(_currentJobId, out var currentJob))
{
if (currentJob.Status == TestJobStatus.Running)
{
long now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
long staleCutoffMs = 5 * 60 * 1000; // 5 minutes
if (now - currentJob.LastUpdateUnixMs > staleCutoffMs)
{
McpLog.Warn($"[TestJobManager] Clearing stale job {_currentJobId} (last update {(now - currentJob.LastUpdateUnixMs) / 1000}s ago)");
currentJob.Status = TestJobStatus.Failed;
currentJob.Error = "Job orphaned after domain reload";
currentJob.FinishedUnixMs = now;
_currentJobId = null;
}
}
}
Async Test Infrastructure & Editor Readiness Status + new refresh_unity tool (#507) * Add editor readiness v2, refresh tool, and preflight guards * Detect external package changes and harden refresh retry * feat: add TestRunnerNoThrottle and async test running with background stall prevention - Add TestRunnerNoThrottle.cs: Sets editor to 'No Throttling' mode during test runs with SessionState persistence across domain reload - Add run_tests_async and get_test_job tools for non-blocking test execution - Add TestJobManager for async test job tracking with progress monitoring - Add ForceSynchronousImport to all AssetDatabase.Refresh() calls to prevent stalls - Mark DomainReloadResilienceTests as [Explicit] with documentation explaining the test infrastructure limitation (internal coroutine waits vs MCP socket polling) - MCP workflow is unaffected - socket messages provide external stimulus that keeps Unity responsive even when backgrounded * refactor: simplify and clean up code - Remove unused Newtonsoft.Json.Linq import from TestJobManager - Add throttling to SessionState persistence (once per second) to reduce overhead - Critical job state changes (start/finish) still persist immediately - Fix duplicate XML summary tag in DomainReloadResilienceTests * docs: add async test tools to README, document domain reload limitation - Add run_tests_async and get_test_job to main README tools list - Document background stall limitation for domain reload tests in DEV readme * ci: add separate job for domain reload tests Run [Explicit] domain_reload tests in their own job using -testCategory * ci: run domain reload tests in same job as regular tests Combines into single job with two test steps to reuse cached Library * fix: address coderabbit review issues - Fix TOCTOU race in TestJobManager.StartJob (single lock scope for check-and-set) - Store TestRunnerApi reference with HideAndDontSave to prevent GC/serialization issues * docs: update tool descriptions to prefer run_tests_async - run_tests_async is now marked as preferred for long-running suites - run_tests description notes it blocks and suggests async alternative * docs: update README screenshot to v8.6 UI * docs: add v8.6 UI screenshot * Update README for MCP version and instructions for v8.7 * fix: handle preflight busy signals and derive job status from test results - manage_asset, manage_gameobject, manage_scene now check preflight return value and propagate busy/retry signals to clients (fixes Sourcery #1) - TestJobManager.FinalizeCurrentJobFromRunFinished now sets job status to Failed when resultPayload.Failed > 0, not always Succeeded (fixes Sourcery #2) * fix: increase HTTP server startup timeout for dev mode When 'Force fresh server install' is enabled, uvx uses --no-cache --refresh which rebuilds the package and takes significantly longer to start. - Increase timeout from 10s to 45s when dev mode is enabled - Add informative log message explaining the longer startup time - Show actual timeout value in warning message * fix: derive job status from test results in FinalizeFromTask fallback Apply same logic as FinalizeCurrentJobFromRunFinished: check result.Failed > 0 to correctly mark jobs as Failed when tests fail, even in the fallback path when RunFinished callback is not delivered.
2026-01-04 04:42:32 +08:00
}
}
catch (Exception ex)
{
// Restoration is best-effort; never block editor load.
McpLog.Warn($"[TestJobManager] Failed to restore SessionState: {ex.Message}");
}
}
private static void PersistToSessionState(bool force = false)
{
long now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
// Throttle non-critical updates to reduce overhead during large test runs
if (!force && (now - _lastPersistUnixMs) < MinPersistIntervalMs)
{
return;
}
try
{
PersistedState snapshot;
lock (LockObj)
{
var jobs = Jobs.Values
.OrderByDescending(j => j.LastUpdateUnixMs)
.Take(MaxJobsToKeep)
.Select(j => new PersistedJob
{
job_id = j.JobId,
status = j.Status.ToString().ToLowerInvariant(),
mode = j.Mode,
started_unix_ms = j.StartedUnixMs,
finished_unix_ms = j.FinishedUnixMs,
last_update_unix_ms = j.LastUpdateUnixMs,
total_tests = j.TotalTests,
completed_tests = j.CompletedTests,
current_test_full_name = j.CurrentTestFullName,
current_test_started_unix_ms = j.CurrentTestStartedUnixMs,
last_finished_test_full_name = j.LastFinishedTestFullName,
last_finished_unix_ms = j.LastFinishedUnixMs,
failures_so_far = (j.FailuresSoFar ?? new List<TestJobFailure>()).Take(FailureCap).ToList(),
error = j.Error
})
.ToList();
snapshot = new PersistedState
{
current_job_id = _currentJobId,
jobs = jobs
};
}
SessionState.SetString(SessionKeyCurrentJobId, snapshot.current_job_id ?? string.Empty);
SessionState.SetString(SessionKeyJobs, JsonConvert.SerializeObject(snapshot));
_lastPersistUnixMs = now;
}
catch (Exception ex)
{
McpLog.Warn($"[TestJobManager] Failed to persist SessionState: {ex.Message}");
}
}
public static string StartJob(TestMode mode, TestFilterOptions filterOptions = null)
{
string jobId = Guid.NewGuid().ToString("N");
long started = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
string modeStr = mode.ToString();
var job = new TestJob
{
JobId = jobId,
Status = TestJobStatus.Running,
Mode = modeStr,
StartedUnixMs = started,
FinishedUnixMs = null,
LastUpdateUnixMs = started,
TotalTests = null,
CompletedTests = 0,
CurrentTestFullName = null,
CurrentTestStartedUnixMs = null,
LastFinishedTestFullName = null,
LastFinishedUnixMs = null,
FailuresSoFar = new List<TestJobFailure>(),
Error = null,
Result = null
};
// Single lock scope for check-and-set to avoid TOCTOU race
lock (LockObj)
{
if (!string.IsNullOrEmpty(_currentJobId))
{
throw new InvalidOperationException("A Unity test run is already in progress.");
}
Jobs[jobId] = job;
_currentJobId = jobId;
}
PersistToSessionState(force: true);
// Kick the run (must be called on main thread; our command handlers already run there).
Task<TestRunResult> task = MCPServiceLocator.Tests.RunTestsAsync(mode, filterOptions);
void FinalizeJob(Action finalize)
{
// Ensure state mutation happens on main thread to avoid Unity API surprises.
EditorApplication.delayCall += () =>
{
try { finalize(); }
catch (Exception ex) { McpLog.Error($"[TestJobManager] Finalize failed: {ex.Message}\n{ex.StackTrace}"); }
};
}
task.ContinueWith(t =>
{
// NOTE: We now finalize jobs deterministically from the TestRunnerService RunFinished callback.
// This continuation is retained as a safety net in case RunFinished is not delivered.
FinalizeJob(() => FinalizeFromTask(jobId, t));
}, TaskScheduler.Default);
return jobId;
}
public static void FinalizeCurrentJobFromRunFinished(TestRunResult resultPayload)
{
long now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
lock (LockObj)
{
if (string.IsNullOrEmpty(_currentJobId) || !Jobs.TryGetValue(_currentJobId, out var job))
{
return;
}
job.LastUpdateUnixMs = now;
job.FinishedUnixMs = now;
job.Status = resultPayload != null && resultPayload.Failed > 0
? TestJobStatus.Failed
: TestJobStatus.Succeeded;
job.Error = null;
job.Result = resultPayload;
job.CurrentTestFullName = null;
_currentJobId = null;
}
PersistToSessionState(force: true);
}
public static void OnRunStarted(int? totalTests)
{
long now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
lock (LockObj)
{
if (string.IsNullOrEmpty(_currentJobId) || !Jobs.TryGetValue(_currentJobId, out var job))
{
return;
}
job.LastUpdateUnixMs = now;
job.TotalTests = totalTests;
job.CompletedTests = 0;
job.CurrentTestFullName = null;
job.CurrentTestStartedUnixMs = null;
job.LastFinishedTestFullName = null;
job.LastFinishedUnixMs = null;
job.FailuresSoFar ??= new List<TestJobFailure>();
job.FailuresSoFar.Clear();
}
PersistToSessionState(force: true);
}
public static void OnTestStarted(string testFullName)
{
if (string.IsNullOrWhiteSpace(testFullName))
{
return;
}
long now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
lock (LockObj)
{
if (string.IsNullOrEmpty(_currentJobId) || !Jobs.TryGetValue(_currentJobId, out var job))
{
return;
}
job.LastUpdateUnixMs = now;
job.CurrentTestFullName = testFullName;
job.CurrentTestStartedUnixMs = now;
}
PersistToSessionState();
}
public static void OnLeafTestFinished(string testFullName, bool isFailure, string message)
{
long now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
lock (LockObj)
{
if (string.IsNullOrEmpty(_currentJobId) || !Jobs.TryGetValue(_currentJobId, out var job))
{
return;
}
job.LastUpdateUnixMs = now;
job.CompletedTests = Math.Max(0, job.CompletedTests + 1);
job.LastFinishedTestFullName = testFullName;
job.LastFinishedUnixMs = now;
if (isFailure)
{
job.FailuresSoFar ??= new List<TestJobFailure>();
if (job.FailuresSoFar.Count < FailureCap)
{
job.FailuresSoFar.Add(new TestJobFailure
{
FullName = testFullName,
Message = string.IsNullOrWhiteSpace(message) ? "Test failed" : message
});
}
}
}
PersistToSessionState();
}
public static void OnRunFinished()
{
long now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
lock (LockObj)
{
if (string.IsNullOrEmpty(_currentJobId) || !Jobs.TryGetValue(_currentJobId, out var job))
{
return;
}
job.LastUpdateUnixMs = now;
job.CurrentTestFullName = null;
}
PersistToSessionState(force: true);
}
v9 pre-release pruning (#528) * refactor: Split ParseColorOrDefault into two overloads and change default to Color.white * Auto-format Python code * Remove unused Python module * Refactored VFX functionality into multiple files Tested everything, works like a charm * Rename ManageVfx folder to just Vfx We know what it's managing * Clean up whitespace on plugin tools and resources * Make ManageGameObject less of a monolith by splitting it out into different files * Remove obsolete FindObjectByInstruction method We also update the namespace for ManageVFX * refactor: Consolidate editor state resources into single canonical implementation Merged EditorStateV2 into EditorState, making get_editor_state the canonical resource. Updated Unity C# to use EditorStateCache directly. Enhanced Python implementation with advice/staleness enrichment, external changes detection, and instance ID inference. Removed duplicate EditorStateV2 resource and legacy fallback mapping. * Validate editor state with Pydantic models in both C# and Python Added strongly-typed Pydantic models for EditorStateV2 schema in Python and corresponding C# classes with JsonProperty attributes. Updated C# to serialize using typed classes instead of anonymous objects. Python now validates the editor state payload before returning it, catching schema mismatches early. * Consolidate run_tests and run_tests_async into single async implementation Merged run_tests_async into run_tests, making async job-based execution the default behavior. Removed synchronous blocking test execution. Updated RunTests.cs to start test jobs immediately and return job_id for polling. Changed TestJobManager methods to internal visibility. Updated README to reflect single run_tests_async tool. Python implementation now uses async job pattern exclusively. * Validate test job responses with Pydantic models in Python * Change resources URI from unity:// to mcpforunity:// It should reduce conflicts with other Unity MCPs that users try, and to comply with Unity's requests regarding use of their company and product name * Update README with all tools + better listing for resources * Update other references to resources * Updated translated doc - unfortunately I cannot verify * Update the Chinese translation of the dev docks * Change menu item from Setup Window to Local Setup Window We now differentiate whether it's HTTP local or remote * Fix URIs for menu items and tests * Shouldn't have removed it * Minor edits from CodeRabbit feedback * Don't use reflection which takes longer * Fix failing python tests * Add serialization helpers for ParticleSystem curves and MinMaxCurve types Added SerializeAnimationCurve and SerializeMinMaxCurve helper methods to properly serialize Unity's curve types. Updated GetInfo to use these helpers for startLifetime, startSpeed, startSize, gravityModifier, and rateOverTime instead of only reading constant values. * Use ctx param * Update Server/src/services/tools/run_tests.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> * Minor fixes * Rename anything EditorStateV2 to just EditorState It's the default, there's no old version * Make infer_single_instance_id public by removing underscore prefix * Fix Python tests, again * Replace AI generated .meta files with actual Unity ones * ## Pre-Launch Enhancements: Testing Infrastructure & Tool Improvements (#8) * Add local test harness for fast developer iteration Scripts for running the NL/T/GO test suites locally against a GUI Unity Editor, complementing the CI workflows in .github/workflows/. Benefits: - 10-100x faster than CI (no Docker startup) - Real-time Unity console debugging - Single test execution for rapid iteration - Auto-detects HTTP vs stdio transport Usage: ./scripts/local-test/setup.sh # One-time setup ./scripts/local-test/quick-test.sh NL-0 # Run single test ./scripts/local-test/run-nl-suite-local.sh # Full suite See scripts/local-test/README.md for details. Also updated .gitignore to: - Allow scripts/local-test/ to be tracked - Ignore generated artifacts (reports/*.xml, .claude/local/, .unity-mcp/) * Fix issue #525: Save dirty scenes for all test modes Move SaveDirtyScenesIfNeeded() call outside the PlayMode conditional so EditMode tests don't get blocked by Unity's "Save Scene" modal dialog. This prevents MCP from timing out when running EditMode tests with unsaved scene changes. * fix: add missing FAST_FAIL_TIMEOUT constant in PluginHub The FAST_FAIL_TIMEOUT class attribute was referenced on line 149 but never defined, causing AttributeError on every ping attempt. This error was silently caught by the broad 'except Exception' handler, causing all fast-fail commands (read_console, get_editor_state, ping) to fail after 6 seconds of retries with 'ping not answered' error. Added FAST_FAIL_TIMEOUT = 10 to define a 10-second timeout for fast-fail commands, matching the intent of the existing fast-fail infrastructure. * feat(ScriptableObject): enhance dry-run validation for AnimationCurve and Quaternion Dry-run validation now validates value formats, not just property existence: - AnimationCurve: Validates structure ({keys:[...]} or direct array), checks each keyframe is an object, validates numeric fields (time, value, inSlope, outSlope, inWeight, outWeight) and integer fields (weightedMode) - Quaternion: Validates array length (3 for Euler, 4 for raw) or object structure ({x,y,z,w} or {euler:[x,y,z]}), ensures all components are numeric Refactored shared validation helpers into appropriate locations: - ParamCoercion: IsNumericToken, ValidateNumericField, ValidateIntegerField - VectorParsing: ValidateAnimationCurveFormat, ValidateQuaternionFormat Added comprehensive XML documentation clarifying keyframe field defaults (all default to 0 except as noted). Added 5 new dry-run validation tests covering valid and invalid formats for both AnimationCurve and Quaternion properties. * test: fix integration tests after merge - test_refresh_unity_retry_recovery: Mock now handles both refresh_unity and get_editor_state commands (refresh_unity internally calls get_editor_state when wait_for_ready=True) - test_run_tests_async_forwards_params: Mock response now includes required 'mode' field for RunTestsStartResponse Pydantic validation - test_get_test_job_forwards_job_id: Updated to handle GetTestJobResponse as Pydantic model instead of dict (use model_dump() for assertions) * Update warning message to apply to all test modes Follow-up to PR #527: Since SaveDirtyScenesIfNeeded() now runs for all test modes, update the warning message to say 'tests' instead of 'PlayMode tests'. * feat(run_tests): add wait_timeout to get_test_job to avoid client loop detection When polling for test completion, MCP clients like Cursor can detect the repeated get_test_job calls as 'looping' and terminate the agent. Added wait_timeout parameter that makes the server wait internally for tests to complete (polling Unity every 2s) before returning. This dramatically reduces client-side tool calls from 10-20 down to 1-2, avoiding loop detection. Usage: get_test_job(job_id='xxx', wait_timeout=30) - Returns immediately if tests complete within timeout - Returns current status if timeout expires (client can call again) - Recommended: 30-60 seconds * fix: use Pydantic attribute access in test_run_tests_async for merge compatibility * revert: remove local test harness - will be submitted in separate PR --------- Co-authored-by: Scott Jennings <scott.jennings+CIGINT@cloudimperiumgames.com> --------- Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Co-authored-by: dsarno <david@lighthaus.us> Co-authored-by: Scott Jennings <scott.jennings+CIGINT@cloudimperiumgames.com>
2026-01-08 06:51:51 +08:00
internal static TestJob GetJob(string jobId)
Async Test Infrastructure & Editor Readiness Status + new refresh_unity tool (#507) * Add editor readiness v2, refresh tool, and preflight guards * Detect external package changes and harden refresh retry * feat: add TestRunnerNoThrottle and async test running with background stall prevention - Add TestRunnerNoThrottle.cs: Sets editor to 'No Throttling' mode during test runs with SessionState persistence across domain reload - Add run_tests_async and get_test_job tools for non-blocking test execution - Add TestJobManager for async test job tracking with progress monitoring - Add ForceSynchronousImport to all AssetDatabase.Refresh() calls to prevent stalls - Mark DomainReloadResilienceTests as [Explicit] with documentation explaining the test infrastructure limitation (internal coroutine waits vs MCP socket polling) - MCP workflow is unaffected - socket messages provide external stimulus that keeps Unity responsive even when backgrounded * refactor: simplify and clean up code - Remove unused Newtonsoft.Json.Linq import from TestJobManager - Add throttling to SessionState persistence (once per second) to reduce overhead - Critical job state changes (start/finish) still persist immediately - Fix duplicate XML summary tag in DomainReloadResilienceTests * docs: add async test tools to README, document domain reload limitation - Add run_tests_async and get_test_job to main README tools list - Document background stall limitation for domain reload tests in DEV readme * ci: add separate job for domain reload tests Run [Explicit] domain_reload tests in their own job using -testCategory * ci: run domain reload tests in same job as regular tests Combines into single job with two test steps to reuse cached Library * fix: address coderabbit review issues - Fix TOCTOU race in TestJobManager.StartJob (single lock scope for check-and-set) - Store TestRunnerApi reference with HideAndDontSave to prevent GC/serialization issues * docs: update tool descriptions to prefer run_tests_async - run_tests_async is now marked as preferred for long-running suites - run_tests description notes it blocks and suggests async alternative * docs: update README screenshot to v8.6 UI * docs: add v8.6 UI screenshot * Update README for MCP version and instructions for v8.7 * fix: handle preflight busy signals and derive job status from test results - manage_asset, manage_gameobject, manage_scene now check preflight return value and propagate busy/retry signals to clients (fixes Sourcery #1) - TestJobManager.FinalizeCurrentJobFromRunFinished now sets job status to Failed when resultPayload.Failed > 0, not always Succeeded (fixes Sourcery #2) * fix: increase HTTP server startup timeout for dev mode When 'Force fresh server install' is enabled, uvx uses --no-cache --refresh which rebuilds the package and takes significantly longer to start. - Increase timeout from 10s to 45s when dev mode is enabled - Add informative log message explaining the longer startup time - Show actual timeout value in warning message * fix: derive job status from test results in FinalizeFromTask fallback Apply same logic as FinalizeCurrentJobFromRunFinished: check result.Failed > 0 to correctly mark jobs as Failed when tests fail, even in the fallback path when RunFinished callback is not delivered.
2026-01-04 04:42:32 +08:00
{
if (string.IsNullOrWhiteSpace(jobId))
{
return null;
}
lock (LockObj)
{
return Jobs.TryGetValue(jobId, out var job) ? job : null;
}
}
v9 pre-release pruning (#528) * refactor: Split ParseColorOrDefault into two overloads and change default to Color.white * Auto-format Python code * Remove unused Python module * Refactored VFX functionality into multiple files Tested everything, works like a charm * Rename ManageVfx folder to just Vfx We know what it's managing * Clean up whitespace on plugin tools and resources * Make ManageGameObject less of a monolith by splitting it out into different files * Remove obsolete FindObjectByInstruction method We also update the namespace for ManageVFX * refactor: Consolidate editor state resources into single canonical implementation Merged EditorStateV2 into EditorState, making get_editor_state the canonical resource. Updated Unity C# to use EditorStateCache directly. Enhanced Python implementation with advice/staleness enrichment, external changes detection, and instance ID inference. Removed duplicate EditorStateV2 resource and legacy fallback mapping. * Validate editor state with Pydantic models in both C# and Python Added strongly-typed Pydantic models for EditorStateV2 schema in Python and corresponding C# classes with JsonProperty attributes. Updated C# to serialize using typed classes instead of anonymous objects. Python now validates the editor state payload before returning it, catching schema mismatches early. * Consolidate run_tests and run_tests_async into single async implementation Merged run_tests_async into run_tests, making async job-based execution the default behavior. Removed synchronous blocking test execution. Updated RunTests.cs to start test jobs immediately and return job_id for polling. Changed TestJobManager methods to internal visibility. Updated README to reflect single run_tests_async tool. Python implementation now uses async job pattern exclusively. * Validate test job responses with Pydantic models in Python * Change resources URI from unity:// to mcpforunity:// It should reduce conflicts with other Unity MCPs that users try, and to comply with Unity's requests regarding use of their company and product name * Update README with all tools + better listing for resources * Update other references to resources * Updated translated doc - unfortunately I cannot verify * Update the Chinese translation of the dev docks * Change menu item from Setup Window to Local Setup Window We now differentiate whether it's HTTP local or remote * Fix URIs for menu items and tests * Shouldn't have removed it * Minor edits from CodeRabbit feedback * Don't use reflection which takes longer * Fix failing python tests * Add serialization helpers for ParticleSystem curves and MinMaxCurve types Added SerializeAnimationCurve and SerializeMinMaxCurve helper methods to properly serialize Unity's curve types. Updated GetInfo to use these helpers for startLifetime, startSpeed, startSize, gravityModifier, and rateOverTime instead of only reading constant values. * Use ctx param * Update Server/src/services/tools/run_tests.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> * Minor fixes * Rename anything EditorStateV2 to just EditorState It's the default, there's no old version * Make infer_single_instance_id public by removing underscore prefix * Fix Python tests, again * Replace AI generated .meta files with actual Unity ones * ## Pre-Launch Enhancements: Testing Infrastructure & Tool Improvements (#8) * Add local test harness for fast developer iteration Scripts for running the NL/T/GO test suites locally against a GUI Unity Editor, complementing the CI workflows in .github/workflows/. Benefits: - 10-100x faster than CI (no Docker startup) - Real-time Unity console debugging - Single test execution for rapid iteration - Auto-detects HTTP vs stdio transport Usage: ./scripts/local-test/setup.sh # One-time setup ./scripts/local-test/quick-test.sh NL-0 # Run single test ./scripts/local-test/run-nl-suite-local.sh # Full suite See scripts/local-test/README.md for details. Also updated .gitignore to: - Allow scripts/local-test/ to be tracked - Ignore generated artifacts (reports/*.xml, .claude/local/, .unity-mcp/) * Fix issue #525: Save dirty scenes for all test modes Move SaveDirtyScenesIfNeeded() call outside the PlayMode conditional so EditMode tests don't get blocked by Unity's "Save Scene" modal dialog. This prevents MCP from timing out when running EditMode tests with unsaved scene changes. * fix: add missing FAST_FAIL_TIMEOUT constant in PluginHub The FAST_FAIL_TIMEOUT class attribute was referenced on line 149 but never defined, causing AttributeError on every ping attempt. This error was silently caught by the broad 'except Exception' handler, causing all fast-fail commands (read_console, get_editor_state, ping) to fail after 6 seconds of retries with 'ping not answered' error. Added FAST_FAIL_TIMEOUT = 10 to define a 10-second timeout for fast-fail commands, matching the intent of the existing fast-fail infrastructure. * feat(ScriptableObject): enhance dry-run validation for AnimationCurve and Quaternion Dry-run validation now validates value formats, not just property existence: - AnimationCurve: Validates structure ({keys:[...]} or direct array), checks each keyframe is an object, validates numeric fields (time, value, inSlope, outSlope, inWeight, outWeight) and integer fields (weightedMode) - Quaternion: Validates array length (3 for Euler, 4 for raw) or object structure ({x,y,z,w} or {euler:[x,y,z]}), ensures all components are numeric Refactored shared validation helpers into appropriate locations: - ParamCoercion: IsNumericToken, ValidateNumericField, ValidateIntegerField - VectorParsing: ValidateAnimationCurveFormat, ValidateQuaternionFormat Added comprehensive XML documentation clarifying keyframe field defaults (all default to 0 except as noted). Added 5 new dry-run validation tests covering valid and invalid formats for both AnimationCurve and Quaternion properties. * test: fix integration tests after merge - test_refresh_unity_retry_recovery: Mock now handles both refresh_unity and get_editor_state commands (refresh_unity internally calls get_editor_state when wait_for_ready=True) - test_run_tests_async_forwards_params: Mock response now includes required 'mode' field for RunTestsStartResponse Pydantic validation - test_get_test_job_forwards_job_id: Updated to handle GetTestJobResponse as Pydantic model instead of dict (use model_dump() for assertions) * Update warning message to apply to all test modes Follow-up to PR #527: Since SaveDirtyScenesIfNeeded() now runs for all test modes, update the warning message to say 'tests' instead of 'PlayMode tests'. * feat(run_tests): add wait_timeout to get_test_job to avoid client loop detection When polling for test completion, MCP clients like Cursor can detect the repeated get_test_job calls as 'looping' and terminate the agent. Added wait_timeout parameter that makes the server wait internally for tests to complete (polling Unity every 2s) before returning. This dramatically reduces client-side tool calls from 10-20 down to 1-2, avoiding loop detection. Usage: get_test_job(job_id='xxx', wait_timeout=30) - Returns immediately if tests complete within timeout - Returns current status if timeout expires (client can call again) - Recommended: 30-60 seconds * fix: use Pydantic attribute access in test_run_tests_async for merge compatibility * revert: remove local test harness - will be submitted in separate PR --------- Co-authored-by: Scott Jennings <scott.jennings+CIGINT@cloudimperiumgames.com> --------- Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Co-authored-by: dsarno <david@lighthaus.us> Co-authored-by: Scott Jennings <scott.jennings+CIGINT@cloudimperiumgames.com>
2026-01-08 06:51:51 +08:00
internal static object ToSerializable(TestJob job, bool includeDetails, bool includeFailedTests)
Async Test Infrastructure & Editor Readiness Status + new refresh_unity tool (#507) * Add editor readiness v2, refresh tool, and preflight guards * Detect external package changes and harden refresh retry * feat: add TestRunnerNoThrottle and async test running with background stall prevention - Add TestRunnerNoThrottle.cs: Sets editor to 'No Throttling' mode during test runs with SessionState persistence across domain reload - Add run_tests_async and get_test_job tools for non-blocking test execution - Add TestJobManager for async test job tracking with progress monitoring - Add ForceSynchronousImport to all AssetDatabase.Refresh() calls to prevent stalls - Mark DomainReloadResilienceTests as [Explicit] with documentation explaining the test infrastructure limitation (internal coroutine waits vs MCP socket polling) - MCP workflow is unaffected - socket messages provide external stimulus that keeps Unity responsive even when backgrounded * refactor: simplify and clean up code - Remove unused Newtonsoft.Json.Linq import from TestJobManager - Add throttling to SessionState persistence (once per second) to reduce overhead - Critical job state changes (start/finish) still persist immediately - Fix duplicate XML summary tag in DomainReloadResilienceTests * docs: add async test tools to README, document domain reload limitation - Add run_tests_async and get_test_job to main README tools list - Document background stall limitation for domain reload tests in DEV readme * ci: add separate job for domain reload tests Run [Explicit] domain_reload tests in their own job using -testCategory * ci: run domain reload tests in same job as regular tests Combines into single job with two test steps to reuse cached Library * fix: address coderabbit review issues - Fix TOCTOU race in TestJobManager.StartJob (single lock scope for check-and-set) - Store TestRunnerApi reference with HideAndDontSave to prevent GC/serialization issues * docs: update tool descriptions to prefer run_tests_async - run_tests_async is now marked as preferred for long-running suites - run_tests description notes it blocks and suggests async alternative * docs: update README screenshot to v8.6 UI * docs: add v8.6 UI screenshot * Update README for MCP version and instructions for v8.7 * fix: handle preflight busy signals and derive job status from test results - manage_asset, manage_gameobject, manage_scene now check preflight return value and propagate busy/retry signals to clients (fixes Sourcery #1) - TestJobManager.FinalizeCurrentJobFromRunFinished now sets job status to Failed when resultPayload.Failed > 0, not always Succeeded (fixes Sourcery #2) * fix: increase HTTP server startup timeout for dev mode When 'Force fresh server install' is enabled, uvx uses --no-cache --refresh which rebuilds the package and takes significantly longer to start. - Increase timeout from 10s to 45s when dev mode is enabled - Add informative log message explaining the longer startup time - Show actual timeout value in warning message * fix: derive job status from test results in FinalizeFromTask fallback Apply same logic as FinalizeCurrentJobFromRunFinished: check result.Failed > 0 to correctly mark jobs as Failed when tests fail, even in the fallback path when RunFinished callback is not delivered.
2026-01-04 04:42:32 +08:00
{
if (job == null)
{
return null;
}
object resultPayload = null;
if (job.Status == TestJobStatus.Succeeded && job.Result != null)
{
resultPayload = job.Result.ToSerializable(job.Mode, includeDetails, includeFailedTests);
}
return new
{
job_id = job.JobId,
status = job.Status.ToString().ToLowerInvariant(),
mode = job.Mode,
started_unix_ms = job.StartedUnixMs,
finished_unix_ms = job.FinishedUnixMs,
last_update_unix_ms = job.LastUpdateUnixMs,
progress = new
{
completed = job.CompletedTests,
total = job.TotalTests,
current_test_full_name = job.CurrentTestFullName,
current_test_started_unix_ms = job.CurrentTestStartedUnixMs,
last_finished_test_full_name = job.LastFinishedTestFullName,
last_finished_unix_ms = job.LastFinishedUnixMs,
stuck_suspected = IsStuck(job),
editor_is_focused = InternalEditorUtility.isApplicationActive,
blocked_reason = GetBlockedReason(job),
failures_so_far = BuildFailuresPayload(job.FailuresSoFar),
failures_capped = (job.FailuresSoFar != null && job.FailuresSoFar.Count >= FailureCap)
},
error = job.Error,
result = resultPayload
};
}
private static string GetBlockedReason(TestJob job)
{
if (job == null || job.Status != TestJobStatus.Running)
{
return null;
}
if (!IsStuck(job))
{
return null;
}
// This matches the real-world symptom you observed: background Unity can get heavily throttled by OS/Editor.
if (!InternalEditorUtility.isApplicationActive)
{
return "editor_unfocused";
}
if (EditorApplication.isCompiling)
{
return "compiling";
}
if (EditorApplication.isUpdating)
{
return "asset_import";
}
return "unknown";
}
private static bool IsStuck(TestJob job)
{
if (job == null || job.Status != TestJobStatus.Running)
{
return false;
}
if (string.IsNullOrWhiteSpace(job.CurrentTestFullName) || !job.CurrentTestStartedUnixMs.HasValue)
{
return false;
}
long now = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
return (now - job.CurrentTestStartedUnixMs.Value) > StuckThresholdMs;
}
private static object[] BuildFailuresPayload(List<TestJobFailure> failures)
{
if (failures == null || failures.Count == 0)
{
return Array.Empty<object>();
}
var list = new object[failures.Count];
for (int i = 0; i < failures.Count; i++)
{
var f = failures[i];
list[i] = new { full_name = f?.FullName, message = f?.Message };
}
return list;
}
private static void FinalizeFromTask(string jobId, Task<TestRunResult> task)
{
lock (LockObj)
{
if (!Jobs.TryGetValue(jobId, out var existing))
{
if (_currentJobId == jobId) _currentJobId = null;
return;
}
// If RunFinished already finalized the job, do nothing.
if (existing.Status != TestJobStatus.Running)
{
if (_currentJobId == jobId) _currentJobId = null;
return;
}
existing.LastUpdateUnixMs = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds();
existing.FinishedUnixMs = existing.LastUpdateUnixMs;
if (task.IsFaulted)
{
existing.Status = TestJobStatus.Failed;
existing.Error = task.Exception?.GetBaseException()?.Message ?? "Unknown test job failure";
existing.Result = null;
}
else if (task.IsCanceled)
{
existing.Status = TestJobStatus.Failed;
existing.Error = "Test job canceled";
existing.Result = null;
}
else
{
var result = task.Result;
existing.Status = result != null && result.Failed > 0
? TestJobStatus.Failed
: TestJobStatus.Succeeded;
existing.Error = null;
existing.Result = result;
}
if (_currentJobId == jobId)
{
_currentJobId = null;
}
}
PersistToSessionState(force: true);
}
}
}