Files
approved-plan-continuity-ha…/scripts/test_approved_plan_continuity_gate.mjs

663 lines
26 KiB
JavaScript

#!/usr/bin/env node
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import { spawnSync } from 'node:child_process';
import { fileURLToPath } from 'node:url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const gateScript = path.join(__dirname, 'approved_plan_continuity_gate.mjs');
function createFixture(files = {}) {
const root = mkdtempSync(path.join(os.tmpdir(), 'approved-plan-continuity-'));
for (const [relativePath, content] of Object.entries(files)) {
const filePath = path.join(root, relativePath);
mkdirSync(path.dirname(filePath), { recursive: true });
writeFileSync(filePath, typeof content === 'string' ? content : `${JSON.stringify(content, null, 2)}\n`);
}
return {
root,
path(...segments) {
return path.join(root, ...segments);
},
cleanup() {
rmSync(root, { recursive: true, force: true });
},
};
}
function runGate({ args = [], stdin = null } = {}) {
const result = spawnSync(process.execPath, [gateScript, ...args], {
input: stdin,
encoding: 'utf8',
});
let json = null;
if (result.stdout && result.stdout.trim()) {
try {
json = JSON.parse(result.stdout);
} catch {
json = null;
}
}
return {
status: result.status,
stdout: result.stdout,
stderr: result.stderr,
json,
};
}
const tests = [
{
name: 'skeleton: gate script responds with placeholder envelope when given fixture input',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-skeleton',
currentTask: 'task-5',
},
});
try {
const result = runGate({
args: ['--compact', '--input', fixture.path('input.json')],
});
if (result.status !== 0 && result.status !== null) {
throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
}
if (!result.json || typeof result.json !== 'object') {
throw new Error(`expected JSON output\nstdout=${result.stdout}`);
}
if (result.json.gate !== 'approved_plan_continuity') {
throw new Error(`expected gate=approved_plan_continuity, got ${JSON.stringify(result.json.gate)}`);
}
} finally {
fixture.cleanup();
}
},
},
{
name: 'continuity: fails when task is complete, next action is known, no dispatch receipt exists, and closure is not in an allowed terminal state',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-missing-dispatch',
currentTask: 'task-6',
taskState: 'complete',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with task-7',
},
replyClosureState: 'completed',
dispatchReceipt: null,
},
});
try {
const result = runGate({
args: ['--compact', '--input', fixture.path('input.json')],
});
if (result.status !== 0 && result.status !== null) {
throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
}
if (!result.json || typeof result.json !== 'object') {
throw new Error(`expected JSON output\nstdout=${result.stdout}`);
}
if (result.json.ok !== false) {
throw new Error(`expected continuity failure ok=false, got ${JSON.stringify(result.json)}`);
}
if (result.json.verdict !== 'continuity_failure') {
throw new Error(`expected verdict=continuity_failure, got ${JSON.stringify(result.json.verdict)}`);
}
} finally {
fixture.cleanup();
}
},
},
{
name: 'continuity: fails when planner returns derivedAction without any bound dispatch receipt',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-derived-action-without-bound-dispatch',
currentTask: 'task-6b',
taskState: 'complete',
derivedAction: {
type: 'message_subagent',
task: 'continue with task-7b',
},
replyClosureState: 'completed',
dispatchReceipt: null,
},
});
try {
const result = runGate({
args: ['--compact', '--input', fixture.path('input.json')],
});
if (result.status !== 0 && result.status !== null) {
throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
}
if (!result.json || typeof result.json !== 'object') {
throw new Error(`expected JSON output\nstdout=${result.stdout}`);
}
if (result.json.ok !== false) {
throw new Error(`expected continuity failure ok=false for derivedAction without dispatch receipt, got ${JSON.stringify(result.json)}`);
}
if (result.json.verdict !== 'continuity_failure') {
throw new Error(`expected verdict=continuity_failure for derivedAction without dispatch receipt, got ${JSON.stringify(result.json.verdict)}`);
}
} finally {
fixture.cleanup();
}
},
},
{
name: 'auto-next obligation: fails when approved plan stops at completed-task boundary without auto-next dispatch',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-auto-next-core',
currentTask: 'task-8',
taskState: 'complete',
nextTaskKnown: true,
sameApprovedPlan: true,
taskBoundaryStop: true,
nextTaskId: 'task-9',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with task-9',
},
replyClosureState: 'completed',
highRiskStop: false,
dispatchReceipt: null,
},
});
try {
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== false) throw new Error(`expected auto-next continuity failure ok=false, got ${JSON.stringify(result.json)}`);
if (result.json.verdict !== 'continuity_failure') throw new Error(`expected verdict=continuity_failure, got ${JSON.stringify(result.json.verdict)}`);
if (result.json.reason !== 'missing_auto_next_dispatch') throw new Error(`expected reason=missing_auto_next_dispatch, got ${JSON.stringify(result.json.reason)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'auto-next obligation: fails when only dry-run derived action exists at completed-task boundary',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-auto-next-dry-run-only',
currentTask: 'task-8b',
taskState: 'complete',
nextTaskKnown: true,
sameApprovedPlan: true,
taskBoundaryStop: true,
nextTaskId: 'task-9b',
derivedAction: {
type: 'message_subagent',
task: 'continue with task-9b',
},
replyClosureState: 'completed',
highRiskStop: false,
dispatchReceipt: null,
},
});
try {
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== false) throw new Error(`expected auto-next continuity failure ok=false, got ${JSON.stringify(result.json)}`);
if (result.json.verdict !== 'continuity_failure') throw new Error(`expected verdict=continuity_failure, got ${JSON.stringify(result.json.verdict)}`);
if (result.json.reason !== 'missing_auto_next_dispatch') throw new Error(`expected reason=missing_auto_next_dispatch, got ${JSON.stringify(result.json.reason)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'auto-next obligation: passes when explicit high-risk stop is active',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-auto-next-high-risk-stop',
currentTask: 'task-8c',
taskState: 'complete',
nextTaskKnown: true,
sameApprovedPlan: true,
taskBoundaryStop: true,
nextTaskId: 'task-9c',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with task-9c',
},
replyClosureState: 'completed',
highRiskStop: true,
dispatchReceipt: null,
},
});
try {
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== true) throw new Error(`expected continuity pass ok=true when highRiskStop=true, got ${JSON.stringify(result.json)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'auto-next obligation: passes when next task is not known',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-auto-next-unknown-next-task',
currentTask: 'task-8d',
taskState: 'complete',
nextTaskKnown: false,
sameApprovedPlan: true,
taskBoundaryStop: true,
replyClosureState: 'completed',
highRiskStop: false,
dispatchReceipt: null,
},
});
try {
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== true) throw new Error(`expected pass when nextTaskKnown=false, got ${JSON.stringify(result.json)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'auto-next obligation: passes when next action is not in the same approved plan',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-auto-next-other-plan',
currentTask: 'task-8e',
taskState: 'complete',
nextTaskKnown: true,
sameApprovedPlan: false,
taskBoundaryStop: true,
nextTaskId: 'task-other',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with unrelated task',
},
replyClosureState: 'completed',
highRiskStop: false,
dispatchReceipt: null,
},
});
try {
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== true) throw new Error(`expected pass when sameApprovedPlan=false, got ${JSON.stringify(result.json)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'auto-next obligation: fails when receipt exists but next-task linkage is stale or mismatched',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-auto-next-linkage-mismatch',
currentTask: 'task-8f',
taskState: 'complete',
nextTaskKnown: true,
sameApprovedPlan: true,
taskBoundaryStop: true,
nextTaskId: 'task-9f',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with task-9f',
},
replyClosureState: 'completed',
highRiskStop: false,
dispatchReceipt: {
planId: 'plan-auto-next-linkage-mismatch',
currentTask: 'task-8f',
nextTaskId: 'task-10f',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with task-10f',
},
dispatchedAt: '2026-04-24T16:00:00+08:00',
},
},
});
try {
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== false) throw new Error(`expected linkage mismatch to fail, got ${JSON.stringify(result.json)}`);
if (result.json.reason !== 'missing_auto_next_dispatch') throw new Error(`expected linkage mismatch reason=missing_auto_next_dispatch, got ${JSON.stringify(result.json.reason)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'auto-next obligation: passes when receipt links to the required next task',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-auto-next-linkage-match',
currentTask: 'task-8g',
taskState: 'complete',
nextTaskKnown: true,
sameApprovedPlan: true,
taskBoundaryStop: true,
nextTaskId: 'task-9g',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with task-9g',
},
replyClosureState: 'completed',
highRiskStop: false,
dispatchReceipt: {
planId: 'plan-auto-next-linkage-match',
currentTask: 'task-8g',
nextTaskId: 'task-9g',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with task-9g',
},
dispatchedAt: '2026-04-24T16:05:00+08:00',
},
},
});
try {
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== true) throw new Error(`expected linkage-matched receipt to pass, got ${JSON.stringify(result.json)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'auto-next obligation: fails when receipt only proves checkpoint/session metadata without actual dispatch linkage',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-auto-next-checkpoint-spoof',
currentTask: 'task-8h',
taskState: 'complete',
nextTaskKnown: true,
sameApprovedPlan: true,
taskBoundaryStop: true,
nextTaskId: 'task-9h',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with task-9h',
},
replyClosureState: 'completed',
highRiskStop: false,
dispatchReceipt: {
planId: 'plan-auto-next-checkpoint-spoof',
currentTask: 'task-8h',
nextTaskId: 'task-9h',
checkpointPath: 'checkpoints/task-8h.json',
sessionKey: 'task-8h',
dispatchedAt: '2026-04-24T16:10:00+08:00',
},
},
});
try {
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== false) throw new Error(`expected checkpoint-only receipt to fail, got ${JSON.stringify(result.json)}`);
if (result.json.reason !== 'missing_auto_next_dispatch') throw new Error(`expected checkpoint-only reason=missing_auto_next_dispatch, got ${JSON.stringify(result.json.reason)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'continuity: fails when dispatchReceipt is a fake non-null object without minimum receipt fields',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-fake-dispatch-receipt',
currentTask: 'task-6fake',
taskState: 'complete',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with task-7fake',
},
replyClosureState: 'completed',
dispatchReceipt: {
fake: true,
},
},
});
try {
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== false) throw new Error(`expected continuity failure ok=false for fake dispatch receipt, got ${JSON.stringify(result.json)}`);
if (result.json.verdict !== 'continuity_failure') throw new Error(`expected verdict=continuity_failure for fake dispatch receipt, got ${JSON.stringify(result.json.verdict)}`);
if (result.json.reason !== 'missing_dispatch_receipt') throw new Error(`expected reason=missing_dispatch_receipt for fake dispatch receipt, got ${JSON.stringify(result.json.reason)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'continuity: passes when task is complete, next action is known, and a dispatch receipt already exists',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-existing-dispatch',
currentTask: 'task-6',
taskState: 'complete',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with task-7',
},
replyClosureState: 'completed',
dispatchReceipt: {
planId: 'plan-existing-dispatch',
currentTask: 'task-6',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with task-7',
},
dispatchedAt: '2026-04-24T11:55:00+08:00',
},
},
});
try {
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== true) throw new Error(`expected continuity pass ok=true when dispatch receipt exists, got ${JSON.stringify(result.json)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'continuity: passes when planner returns derivedAction and a bound dispatch receipt already exists',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-derived-action-with-bound-dispatch',
currentTask: 'task-6c',
taskState: 'complete',
derivedAction: {
type: 'message_subagent',
task: 'continue with task-7c',
},
replyClosureState: 'completed',
dispatchReceipt: {
planId: 'plan-derived-action-with-bound-dispatch',
currentTask: 'task-6c',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with task-7c',
},
dispatchedAt: '2026-04-24T12:05:00+08:00',
},
},
});
try {
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== true) throw new Error(`expected continuity pass ok=true when derivedAction has bound dispatch receipt, got ${JSON.stringify(result.json)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'continuity: passes when task is complete, next action is known, no dispatch receipt exists, and closure is waiting_user',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-waiting-user-closure',
currentTask: 'task-8',
taskState: 'complete',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with task-9',
},
replyClosureState: 'waiting_user',
dispatchReceipt: null,
},
});
try {
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== true) throw new Error(`expected continuity pass ok=true when closure is waiting_user, got ${JSON.stringify(result.json)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'continuity: passes when task is complete, next action is known, no dispatch receipt exists, and closure is pending_verification',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-pending-verification-closure',
currentTask: 'task-8b',
taskState: 'complete',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with task-9',
},
replyClosureState: 'pending_verification',
dispatchReceipt: null,
},
});
try {
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== true) throw new Error(`expected continuity pass ok=true when closure is pending_verification, got ${JSON.stringify(result.json)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'continuity: passes when task is complete, next action is known, no dispatch receipt exists, and closure is blocked',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-blocked-closure',
currentTask: 'task-9',
taskState: 'complete',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with task-10',
},
replyClosureState: 'blocked',
dispatchReceipt: null,
},
});
try {
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== true) throw new Error(`expected continuity pass ok=true when closure is blocked, got ${JSON.stringify(result.json)}`);
} finally {
fixture.cleanup();
}
},
},
];
const results = [];
let failed = false;
for (const test of tests) {
try {
test.run();
results.push({ test: test.name, ok: true });
} catch (error) {
failed = true;
results.push({
test: test.name,
ok: false,
error: error instanceof Error ? error.message : String(error),
});
}
}
const summary = {
total: tests.length,
passed: results.filter((entry) => entry.ok).length,
failed: results.filter((entry) => !entry.ok).length,
};
process.stdout.write(`${JSON.stringify({ summary, results }, null, 2)}\n`);
if (failed) process.exit(1);