feat: sync auto-next obligation gate hardening

This commit is contained in:
2026-04-24 16:41:48 +08:00
parent 7c362dedf8
commit cb34935b28
9 changed files with 741 additions and 155 deletions

View File

@@ -11,6 +11,10 @@ function isObject(value) {
return value != null && typeof value === 'object' && !Array.isArray(value);
}
function normalizeAction(action) {
return JSON.stringify(action ?? null);
}
function hasValidDispatchReceipt(receipt) {
if (!isObject(receipt)) return false;
if (!isNonEmptyString(receipt.planId)) return false;
@@ -20,6 +24,27 @@ function hasValidDispatchReceipt(receipt) {
return true;
}
function receiptMatchesPayload(payload, receipt) {
if (!hasValidDispatchReceipt(receipt)) return false;
const expectedPlanId = payload?.planId;
if (isNonEmptyString(expectedPlanId) && receipt.planId !== expectedPlanId) return false;
const expectedCurrentTask = payload?.currentTask;
if (isNonEmptyString(expectedCurrentTask) && receipt.currentTask !== expectedCurrentTask) return false;
const expectedNextTask = payload?.nextTaskId ?? payload?.nextTaskKey ?? null;
const receiptNextTask = receipt?.nextTaskId ?? receipt?.nextTaskKey ?? null;
if (isNonEmptyString(expectedNextTask) && receiptNextTask !== expectedNextTask) return false;
const expectedNextAction = payload?.nextDerivedAction ?? payload?.derivedAction ?? null;
if (expectedNextAction != null && normalizeAction(receipt.nextDerivedAction) !== normalizeAction(expectedNextAction)) {
return false;
}
return true;
}
function parseArgs(argv) {
let inputPath = null;
let compact = false;
@@ -76,11 +101,39 @@ function evaluateContinuity(payload) {
const taskComplete = payload?.taskState === 'complete';
const nextAction = payload?.nextDerivedAction ?? payload?.derivedAction ?? null;
const nextActionKnown = nextAction != null;
const hasDispatchReceipt = hasValidDispatchReceipt(payload?.dispatchReceipt ?? null);
const explicitNextTaskKnown = payload?.nextTaskKnown === true;
const sameApprovedPlan = payload?.sameApprovedPlan === true;
const taskBoundaryStop = payload?.taskBoundaryStop === true;
const highRiskStop = payload?.highRiskStop === true;
const closureState = payload?.replyClosureState ?? null;
const isLegalTerminalState = LEGAL_TERMINAL_STATES.has(closureState);
const hasDispatchReceipt = receiptMatchesPayload(payload, payload?.dispatchReceipt ?? null);
const autoNextObligatory = taskComplete
&& explicitNextTaskKnown
&& sameApprovedPlan
&& taskBoundaryStop
&& !isLegalTerminalState
&& !highRiskStop;
if (taskComplete && nextActionKnown && !hasDispatchReceipt && !isLegalTerminalState) {
if (autoNextObligatory && !hasDispatchReceipt) {
return {
ok: false,
status: 'continuity_failure',
verdict: 'continuity_failure',
reason: 'missing_auto_next_dispatch',
};
}
if (taskComplete && nextActionKnown && !hasDispatchReceipt && !isLegalTerminalState && !highRiskStop && !('sameApprovedPlan' in (payload ?? {}))) {
return {
ok: false,
status: 'continuity_failure',
verdict: 'continuity_failure',
reason: 'missing_dispatch_receipt',
};
}
if (taskComplete && nextActionKnown && !hasDispatchReceipt && !isLegalTerminalState && !highRiskStop && sameApprovedPlan && !taskBoundaryStop && !explicitNextTaskKnown) {
return {
ok: false,
status: 'continuity_failure',
@@ -122,5 +175,4 @@ const response = {
},
};
process.stdout.write(`${JSON.stringify(response)}
`);
process.stdout.write(`${JSON.stringify(response)}\n`);

View File

@@ -81,6 +81,7 @@ function buildReceipt(payload) {
const receipt = {
planId: payload?.planId ?? null,
currentTask: payload?.currentTask ?? null,
nextTaskId: payload?.nextTaskId ?? null,
nextDerivedAction: nextAction,
dispatchedAt: payload?.dispatchedAt ?? null,
dispatchRunId: payload?.dispatchRunId ?? null,
@@ -97,6 +98,7 @@ function validateReceipt(receipt) {
for (const field of [
'planId',
'currentTask',
'nextTaskId',
'nextDerivedAction',
'dispatchedAt',
'dispatchRunId',

View File

@@ -168,6 +168,288 @@ const tests = [
}
},
},
{
name: 'auto-next obligation: fails when approved plan stops at completed-task boundary without auto-next dispatch',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-auto-next-core',
currentTask: 'task-8',
taskState: 'complete',
nextTaskKnown: true,
sameApprovedPlan: true,
taskBoundaryStop: true,
nextTaskId: 'task-9',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with task-9',
},
replyClosureState: 'completed',
highRiskStop: false,
dispatchReceipt: null,
},
});
try {
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== false) throw new Error(`expected auto-next continuity failure ok=false, got ${JSON.stringify(result.json)}`);
if (result.json.verdict !== 'continuity_failure') throw new Error(`expected verdict=continuity_failure, got ${JSON.stringify(result.json.verdict)}`);
if (result.json.reason !== 'missing_auto_next_dispatch') throw new Error(`expected reason=missing_auto_next_dispatch, got ${JSON.stringify(result.json.reason)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'auto-next obligation: fails when only dry-run derived action exists at completed-task boundary',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-auto-next-dry-run-only',
currentTask: 'task-8b',
taskState: 'complete',
nextTaskKnown: true,
sameApprovedPlan: true,
taskBoundaryStop: true,
nextTaskId: 'task-9b',
derivedAction: {
type: 'message_subagent',
task: 'continue with task-9b',
},
replyClosureState: 'completed',
highRiskStop: false,
dispatchReceipt: null,
},
});
try {
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== false) throw new Error(`expected auto-next continuity failure ok=false, got ${JSON.stringify(result.json)}`);
if (result.json.verdict !== 'continuity_failure') throw new Error(`expected verdict=continuity_failure, got ${JSON.stringify(result.json.verdict)}`);
if (result.json.reason !== 'missing_auto_next_dispatch') throw new Error(`expected reason=missing_auto_next_dispatch, got ${JSON.stringify(result.json.reason)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'auto-next obligation: passes when explicit high-risk stop is active',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-auto-next-high-risk-stop',
currentTask: 'task-8c',
taskState: 'complete',
nextTaskKnown: true,
sameApprovedPlan: true,
taskBoundaryStop: true,
nextTaskId: 'task-9c',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with task-9c',
},
replyClosureState: 'completed',
highRiskStop: true,
dispatchReceipt: null,
},
});
try {
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== true) throw new Error(`expected continuity pass ok=true when highRiskStop=true, got ${JSON.stringify(result.json)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'auto-next obligation: passes when next task is not known',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-auto-next-unknown-next-task',
currentTask: 'task-8d',
taskState: 'complete',
nextTaskKnown: false,
sameApprovedPlan: true,
taskBoundaryStop: true,
replyClosureState: 'completed',
highRiskStop: false,
dispatchReceipt: null,
},
});
try {
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== true) throw new Error(`expected pass when nextTaskKnown=false, got ${JSON.stringify(result.json)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'auto-next obligation: passes when next action is not in the same approved plan',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-auto-next-other-plan',
currentTask: 'task-8e',
taskState: 'complete',
nextTaskKnown: true,
sameApprovedPlan: false,
taskBoundaryStop: true,
nextTaskId: 'task-other',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with unrelated task',
},
replyClosureState: 'completed',
highRiskStop: false,
dispatchReceipt: null,
},
});
try {
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== true) throw new Error(`expected pass when sameApprovedPlan=false, got ${JSON.stringify(result.json)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'auto-next obligation: fails when receipt exists but next-task linkage is stale or mismatched',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-auto-next-linkage-mismatch',
currentTask: 'task-8f',
taskState: 'complete',
nextTaskKnown: true,
sameApprovedPlan: true,
taskBoundaryStop: true,
nextTaskId: 'task-9f',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with task-9f',
},
replyClosureState: 'completed',
highRiskStop: false,
dispatchReceipt: {
planId: 'plan-auto-next-linkage-mismatch',
currentTask: 'task-8f',
nextTaskId: 'task-10f',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with task-10f',
},
dispatchedAt: '2026-04-24T16:00:00+08:00',
},
},
});
try {
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== false) throw new Error(`expected linkage mismatch to fail, got ${JSON.stringify(result.json)}`);
if (result.json.reason !== 'missing_auto_next_dispatch') throw new Error(`expected linkage mismatch reason=missing_auto_next_dispatch, got ${JSON.stringify(result.json.reason)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'auto-next obligation: passes when receipt links to the required next task',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-auto-next-linkage-match',
currentTask: 'task-8g',
taskState: 'complete',
nextTaskKnown: true,
sameApprovedPlan: true,
taskBoundaryStop: true,
nextTaskId: 'task-9g',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with task-9g',
},
replyClosureState: 'completed',
highRiskStop: false,
dispatchReceipt: {
planId: 'plan-auto-next-linkage-match',
currentTask: 'task-8g',
nextTaskId: 'task-9g',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with task-9g',
},
dispatchedAt: '2026-04-24T16:05:00+08:00',
},
},
});
try {
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== true) throw new Error(`expected linkage-matched receipt to pass, got ${JSON.stringify(result.json)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'auto-next obligation: fails when receipt only proves checkpoint/session metadata without actual dispatch linkage',
run() {
const fixture = createFixture({
'input.json': {
planId: 'plan-auto-next-checkpoint-spoof',
currentTask: 'task-8h',
taskState: 'complete',
nextTaskKnown: true,
sameApprovedPlan: true,
taskBoundaryStop: true,
nextTaskId: 'task-9h',
nextDerivedAction: {
type: 'message_subagent',
task: 'continue with task-9h',
},
replyClosureState: 'completed',
highRiskStop: false,
dispatchReceipt: {
planId: 'plan-auto-next-checkpoint-spoof',
currentTask: 'task-8h',
nextTaskId: 'task-9h',
checkpointPath: 'checkpoints/task-8h.json',
sessionKey: 'task-8h',
dispatchedAt: '2026-04-24T16:10:00+08:00',
},
},
});
try {
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== false) throw new Error(`expected checkpoint-only receipt to fail, got ${JSON.stringify(result.json)}`);
if (result.json.reason !== 'missing_auto_next_dispatch') throw new Error(`expected checkpoint-only reason=missing_auto_next_dispatch, got ${JSON.stringify(result.json.reason)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'continuity: fails when dispatchReceipt is a fake non-null object without minimum receipt fields',
run() {
@@ -188,35 +470,17 @@ const tests = [
});
try {
const result = runGate({
args: ['--compact', '--input', fixture.path('input.json')],
});
if (result.status !== 0 && result.status !== null) {
throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
}
if (!result.json || typeof result.json !== 'object') {
throw new Error(`expected JSON output\nstdout=${result.stdout}`);
}
if (result.json.ok !== false) {
throw new Error(`expected continuity failure ok=false for fake dispatch receipt, got ${JSON.stringify(result.json)}`);
}
if (result.json.verdict !== 'continuity_failure') {
throw new Error(`expected verdict=continuity_failure for fake dispatch receipt, got ${JSON.stringify(result.json.verdict)}`);
}
if (result.json.reason !== 'missing_dispatch_receipt') {
throw new Error(`expected reason=missing_dispatch_receipt for fake dispatch receipt, got ${JSON.stringify(result.json.reason)}`);
}
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== false) throw new Error(`expected continuity failure ok=false for fake dispatch receipt, got ${JSON.stringify(result.json)}`);
if (result.json.verdict !== 'continuity_failure') throw new Error(`expected verdict=continuity_failure for fake dispatch receipt, got ${JSON.stringify(result.json.verdict)}`);
if (result.json.reason !== 'missing_dispatch_receipt') throw new Error(`expected reason=missing_dispatch_receipt for fake dispatch receipt, got ${JSON.stringify(result.json.reason)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'continuity: passes when task is complete, next action is known, and a dispatch receipt already exists',
run() {
@@ -243,27 +507,15 @@ const tests = [
});
try {
const result = runGate({
args: ['--compact', '--input', fixture.path('input.json')],
});
if (result.status !== 0 && result.status !== null) {
throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
}
if (!result.json || typeof result.json !== 'object') {
throw new Error(`expected JSON output\nstdout=${result.stdout}`);
}
if (result.json.ok !== true) {
throw new Error(`expected continuity pass ok=true when dispatch receipt exists, got ${JSON.stringify(result.json)}`);
}
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== true) throw new Error(`expected continuity pass ok=true when dispatch receipt exists, got ${JSON.stringify(result.json)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'continuity: passes when planner returns derivedAction and a bound dispatch receipt already exists',
run() {
@@ -290,27 +542,15 @@ const tests = [
});
try {
const result = runGate({
args: ['--compact', '--input', fixture.path('input.json')],
});
if (result.status !== 0 && result.status !== null) {
throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
}
if (!result.json || typeof result.json !== 'object') {
throw new Error(`expected JSON output\nstdout=${result.stdout}`);
}
if (result.json.ok !== true) {
throw new Error(`expected continuity pass ok=true when derivedAction has bound dispatch receipt, got ${JSON.stringify(result.json)}`);
}
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== true) throw new Error(`expected continuity pass ok=true when derivedAction has bound dispatch receipt, got ${JSON.stringify(result.json)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'continuity: passes when task is complete, next action is known, no dispatch receipt exists, and closure is waiting_user',
run() {
@@ -329,27 +569,15 @@ const tests = [
});
try {
const result = runGate({
args: ['--compact', '--input', fixture.path('input.json')],
});
if (result.status !== 0 && result.status !== null) {
throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
}
if (!result.json || typeof result.json !== 'object') {
throw new Error(`expected JSON output\nstdout=${result.stdout}`);
}
if (result.json.ok !== true) {
throw new Error(`expected continuity pass ok=true when closure is waiting_user, got ${JSON.stringify(result.json)}`);
}
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== true) throw new Error(`expected continuity pass ok=true when closure is waiting_user, got ${JSON.stringify(result.json)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'continuity: passes when task is complete, next action is known, no dispatch receipt exists, and closure is pending_verification',
run() {
@@ -368,27 +596,15 @@ const tests = [
});
try {
const result = runGate({
args: ['--compact', '--input', fixture.path('input.json')],
});
if (result.status !== 0 && result.status !== null) {
throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
}
if (!result.json || typeof result.json !== 'object') {
throw new Error(`expected JSON output\nstdout=${result.stdout}`);
}
if (result.json.ok !== true) {
throw new Error(`expected continuity pass ok=true when closure is pending_verification, got ${JSON.stringify(result.json)}`);
}
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== true) throw new Error(`expected continuity pass ok=true when closure is pending_verification, got ${JSON.stringify(result.json)}`);
} finally {
fixture.cleanup();
}
},
},
{
name: 'continuity: passes when task is complete, next action is known, no dispatch receipt exists, and closure is blocked',
run() {
@@ -407,21 +623,10 @@ const tests = [
});
try {
const result = runGate({
args: ['--compact', '--input', fixture.path('input.json')],
});
if (result.status !== 0 && result.status !== null) {
throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
}
if (!result.json || typeof result.json !== 'object') {
throw new Error(`expected JSON output\nstdout=${result.stdout}`);
}
if (result.json.ok !== true) {
throw new Error(`expected continuity pass ok=true when closure is blocked, got ${JSON.stringify(result.json)}`);
}
const result = runGate({ args: ['--compact', '--input', fixture.path('input.json')] });
if (result.status !== 0 && result.status !== null) throw new Error(`expected controlled execution, got status=${result.status}\n${result.stderr || result.stdout}`);
if (!result.json || typeof result.json !== 'object') throw new Error(`expected JSON output\nstdout=${result.stdout}`);
if (result.json.ok !== true) throw new Error(`expected continuity pass ok=true when closure is blocked, got ${JSON.stringify(result.json)}`);
} finally {
fixture.cleanup();
}

View File

@@ -312,8 +312,10 @@ async function main() {
assert.match(passInjected, /\[APPROVED_PLAN_CONTINUITY_GATE\]/, 'hook pass-path should emit approved-plan continuity gate block');
assert.match(passInjected, /status=continuity_failure/, 'hook pass-path should fail continuity when planner only returns dry-run dispatch without a real receipt');
assert.match(passInjected, /verdict=continuity_failure/, 'hook pass-path should expose continuity failure verdict when no real dispatch receipt exists');
assert.match(passInjected, /reason=missing_dispatch_receipt/, 'hook pass-path should require a real dispatch receipt instead of treating dry-run dispatch as one');
assert.match(passInjected, /Route back to continuity failure until a real next dispatch receipt exists/, 'hook pass-path should hard-gate normal closeout until a real receipt exists');
assert.match(passInjected, /reason=missing_auto_next_dispatch/, 'hook pass-path should require auto-next dispatch proof instead of treating dry-run dispatch as enough');
assert.match(passInjected, /Do not stop at this completed-task boundary/, 'hook pass-path should explicitly forbid stopping at the completed-task boundary');
assert.match(passInjected, /Auto-dispatch the next task in the same approved plan, unless waiting_user, blocked, pending_verification, or high-risk stop applies/, 'hook pass-path should explain the auto-next obligation exceptions');
assert.match(passInjected, /Do not stop at this completed-task boundary/, 'hook pass-path should hard-gate the completed-task boundary');
assert.doesNotMatch(passInjected, /\[APPROVED_PLAN_CONTINUITY_GATE\][\s\S]*status=pass/, 'hook pass-path should not let approved-plan continuity pass on dry-run dispatch alone');
const failInjected = await withPatchedWrapper(buildWrapperScript({