241 lines
7.3 KiB
JavaScript
241 lines
7.3 KiB
JavaScript
#!/usr/bin/env node
|
|
import assert from 'node:assert/strict';
|
|
import { spawnSync } from 'node:child_process';
|
|
import path from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const __dirname = path.dirname(__filename);
|
|
const plannerScript = path.join(__dirname, 'plan_long_task_auto_chain.mjs');
|
|
|
|
const scenarios = [
|
|
{
|
|
name: 'implementer result with review-required next action -> review dispatch',
|
|
input: {
|
|
gateStatus: 'pass',
|
|
actorStage: 'implementer_result',
|
|
requiredNextAction: 'request_spec_review',
|
|
executionEvidence: {
|
|
modifiedFiles: ['scripts/example.mjs'],
|
|
verificationResult: 'tests pass',
|
|
},
|
|
},
|
|
expected: {
|
|
plannerStatus: 'pass',
|
|
derivedAction: 'dispatch_spec_review',
|
|
dispatchMode: 'dry_run_dispatch',
|
|
autoChainAllowed: true,
|
|
reasonIncludes: 'implementation evidence present',
|
|
requiredEvidenceIncludes: 'executionEvidence',
|
|
},
|
|
},
|
|
{
|
|
name: 'spec review PASS -> code quality review dispatch',
|
|
input: {
|
|
gateStatus: 'pass',
|
|
actorStage: 'spec_review',
|
|
reviewOutcome: 'pass',
|
|
requiredNextAction: 'request_code_quality_review',
|
|
reviewEvidence: {
|
|
reviewer: 'spec-reviewer',
|
|
verdict: 'pass',
|
|
},
|
|
},
|
|
expected: {
|
|
plannerStatus: 'pass',
|
|
derivedAction: 'dispatch_code_quality_review',
|
|
dispatchMode: 'dry_run_dispatch',
|
|
autoChainAllowed: true,
|
|
reasonIncludes: 'review pass evidence present',
|
|
requiredEvidenceIncludes: 'reviewEvidence',
|
|
},
|
|
},
|
|
{
|
|
name: 'explicit blocker -> retry/fix action',
|
|
input: {
|
|
gateStatus: 'pass',
|
|
actorStage: 'review_result',
|
|
blocker: 'tests failed in review',
|
|
requiredNextAction: 'fix_review_findings',
|
|
blockerEvidence: {
|
|
reviewer: 'qa-reviewer',
|
|
finding: 'tests failed',
|
|
},
|
|
},
|
|
expected: {
|
|
plannerStatus: 'pass',
|
|
derivedAction: 'dispatch_fix_slice',
|
|
dispatchMode: 'dry_run_dispatch',
|
|
autoChainAllowed: true,
|
|
reasonIncludes: 'blocker evidence present',
|
|
requiredEvidenceIncludes: 'blockerEvidence',
|
|
},
|
|
},
|
|
{
|
|
name: 'no concrete next action -> none',
|
|
input: {
|
|
gateStatus: 'pass',
|
|
actorStage: 'implementer_result',
|
|
executionEvidence: {
|
|
modifiedFiles: ['scripts/example.mjs'],
|
|
},
|
|
},
|
|
expected: {
|
|
plannerStatus: 'none',
|
|
derivedAction: 'none',
|
|
dispatchMode: 'no_dispatch',
|
|
autoChainAllowed: false,
|
|
reasonIncludes: 'no concrete next action',
|
|
requiredEvidenceIncludes: 'concreteNextAction',
|
|
},
|
|
},
|
|
{
|
|
name: 'gate fail refuses auto-chain',
|
|
input: {
|
|
gateStatus: 'fail',
|
|
actorStage: 'implementer_result',
|
|
requiredNextAction: 'request_spec_review',
|
|
executionEvidence: {
|
|
modifiedFiles: ['scripts/example.mjs'],
|
|
},
|
|
},
|
|
expected: {
|
|
plannerStatus: 'blocked_by_gate',
|
|
derivedAction: 'none',
|
|
dispatchMode: 'no_dispatch',
|
|
autoChainAllowed: false,
|
|
reasonIncludes: 'gateStatus must pass',
|
|
requiredEvidenceIncludes: 'gateStatus=pass',
|
|
},
|
|
},
|
|
{
|
|
name: 'textual review request without implementation evidence -> blocked_by_evidence',
|
|
input: {
|
|
gateStatus: 'pass',
|
|
actorStage: 'implementer_result',
|
|
requiredNextAction: 'request_spec_review',
|
|
},
|
|
expected: {
|
|
plannerStatus: 'blocked_by_evidence',
|
|
derivedAction: 'none',
|
|
dispatchMode: 'no_dispatch',
|
|
autoChainAllowed: false,
|
|
reasonIncludes: 'implementation evidence missing',
|
|
requiredEvidenceIncludes: 'executionEvidence',
|
|
},
|
|
},
|
|
{
|
|
name: 'spec review pass without review evidence -> blocked_by_evidence',
|
|
input: {
|
|
gateStatus: 'pass',
|
|
actorStage: 'spec_review',
|
|
reviewOutcome: 'pass',
|
|
requiredNextAction: 'request_code_quality_review',
|
|
},
|
|
expected: {
|
|
plannerStatus: 'blocked_by_evidence',
|
|
derivedAction: 'none',
|
|
dispatchMode: 'no_dispatch',
|
|
autoChainAllowed: false,
|
|
reasonIncludes: 'review pass evidence missing',
|
|
requiredEvidenceIncludes: 'reviewEvidence',
|
|
},
|
|
},
|
|
{
|
|
name: 'fix slice without blocker evidence -> blocked_by_evidence',
|
|
input: {
|
|
gateStatus: 'pass',
|
|
actorStage: 'review_result',
|
|
blocker: 'hook_preflight_blocker',
|
|
requiredNextAction: 'fix_review_findings',
|
|
},
|
|
expected: {
|
|
plannerStatus: 'blocked_by_evidence',
|
|
derivedAction: 'none',
|
|
dispatchMode: 'no_dispatch',
|
|
autoChainAllowed: false,
|
|
reasonIncludes: 'blocker evidence missing',
|
|
requiredEvidenceIncludes: 'blockerEvidence',
|
|
},
|
|
},
|
|
];
|
|
|
|
function runPlanner(input) {
|
|
const result = spawnSync(process.execPath, [plannerScript, '--compact'], {
|
|
input: JSON.stringify(input),
|
|
encoding: 'utf8',
|
|
});
|
|
|
|
if (result.status !== 0) {
|
|
throw new Error(`planner script failed with status=${result.status}: ${result.stderr || result.stdout}`);
|
|
}
|
|
|
|
let parsed;
|
|
try {
|
|
parsed = JSON.parse(result.stdout);
|
|
} catch (error) {
|
|
throw new Error(`planner script returned invalid JSON: ${error.message}\nstdout=${result.stdout}`);
|
|
}
|
|
|
|
return parsed;
|
|
}
|
|
|
|
function requireCoreFields(output) {
|
|
assert.equal(typeof output.plannerStatus, 'string', 'plannerStatus should be string');
|
|
assert.equal(typeof output.derivedAction, 'string', 'derivedAction should be string');
|
|
assert.equal(typeof output.dispatchMode, 'string', 'dispatchMode should be string');
|
|
assert.equal(typeof output.reason, 'string', 'reason should be string');
|
|
assert.ok(Array.isArray(output.requiredEvidence), 'requiredEvidence should be an array');
|
|
assert.equal(typeof output.autoChainAllowed, 'boolean', 'autoChainAllowed should be boolean');
|
|
}
|
|
|
|
function assertScenario(output, expected) {
|
|
assert.equal(output.plannerStatus, expected.plannerStatus, 'plannerStatus mismatch');
|
|
assert.equal(output.derivedAction, expected.derivedAction, 'derivedAction mismatch');
|
|
assert.equal(output.dispatchMode, expected.dispatchMode, 'dispatchMode mismatch');
|
|
assert.equal(output.autoChainAllowed, expected.autoChainAllowed, 'autoChainAllowed mismatch');
|
|
assert.match(output.reason, new RegExp(expected.reasonIncludes.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')));
|
|
assert.ok(
|
|
output.requiredEvidence.includes(expected.requiredEvidenceIncludes),
|
|
`expected requiredEvidence to include: ${expected.requiredEvidenceIncludes}`,
|
|
);
|
|
}
|
|
|
|
const results = [];
|
|
let failed = false;
|
|
|
|
for (const scenario of scenarios) {
|
|
try {
|
|
const output = runPlanner(scenario.input);
|
|
requireCoreFields(output);
|
|
assertScenario(output, scenario.expected);
|
|
results.push({
|
|
scenario: scenario.name,
|
|
ok: true,
|
|
plannerStatus: output.plannerStatus,
|
|
derivedAction: output.derivedAction,
|
|
dispatchMode: output.dispatchMode,
|
|
autoChainAllowed: output.autoChainAllowed,
|
|
reason: output.reason,
|
|
requiredEvidence: output.requiredEvidence,
|
|
});
|
|
} catch (error) {
|
|
failed = true;
|
|
results.push({
|
|
scenario: scenario.name,
|
|
ok: false,
|
|
error: error instanceof Error ? error.message : String(error),
|
|
});
|
|
}
|
|
}
|
|
|
|
const summary = {
|
|
total: results.length,
|
|
passed: results.filter((entry) => entry.ok).length,
|
|
failed: results.filter((entry) => !entry.ok).length,
|
|
};
|
|
|
|
process.stdout.write(`${JSON.stringify({ summary, results }, null, 2)}\n`);
|
|
|
|
if (failed) process.exit(1);
|