feat: require auto-chain action evidence

2026-04-23 19:34:24 +08:00
parent 242f7ce463
commit 17dd26cde7
3 changed files with 78 additions and 11 deletions
--- a/scripts/test_force_recall_long_task_preflight.mjs
+++ b/scripts/test_force_recall_long_task_preflight.mjs
@@ -106,6 +106,8 @@ async function main() {
    classification: 'long_task',
    claimedExecution: true,
    concreteNextAction: 'dispatch_follow_up_subagent',
+    autoChainNextAction: 'dispatch_follow_up_subagent',
+    autoChainDispatchEvidence: { sessionKey: 'task-123', dispatched: 'dispatch_follow_up_subagent' },
    progressionClaim: 'already progressing to the next step in background',
    progressEvidence: { sessionKey: 'task-123' },
  });
@@ -115,20 +117,30 @@ async function main() {
    classification: 'long_task',
    claimedExecution: true,
    concreteNextAction: 'dispatch_follow_up_subagent',
+    autoChainNextAction: 'dispatch_follow_up_subagent',
    progressionClaim: 'already progressing to the next step in background',
    executionEvidence: { concreteNextAction: 'dispatch_follow_up_subagent' },
  });
-  assert.equal(failResult.gateStatus, 'fail', 'fail-path should fail when progressionClaim lacks progressEvidence');
-  assert.match(JSON.stringify(failResult), /progressEvidence/, 'fail-path should require progressEvidence');
+  assert.equal(failResult.gateStatus, 'fail', 'fail-path should fail when explicit auto-chain action lacks dispatch evidence');
+  assert.match(JSON.stringify(failResult), /autoChainDispatchEvidence/, 'fail-path should require autoChainDispatchEvidence');

  const neutralResult = evaluateGate({
    classification: 'long_task',
    claimedExecution: true,
-    concreteNextAction: 'dispatch_follow_up_subagent',
-    executionEvidence: { concreteNextAction: 'dispatch_follow_up_subagent' },
+    concreteNextAction: 'summarize findings for reply',
+    executionEvidence: { concreteNextAction: 'summarize findings for reply' },
  });
-  assert.equal(neutralResult.gateStatus, 'pass', 'neutral-path should pass when there is no progression claim');
-  assert.doesNotMatch(JSON.stringify(neutralResult), /progressEvidence/, 'neutral-path should not require progressEvidence');
+  assert.equal(neutralResult.gateStatus, 'pass', 'neutral-path should pass when there is no explicit auto-chain next action');
+  assert.doesNotMatch(JSON.stringify(neutralResult), /autoChainDispatchEvidence/, 'neutral-path should not require auto-chain dispatch evidence');
+
+  const directAutoChainFailResult = evaluateGate({
+    classification: 'long_task',
+    claimedExecution: true,
+    concreteNextAction: 'dispatch_follow_up_subagent',
+    autoChainNextAction: 'dispatch_follow_up_subagent',
+  });
+  assert.equal(directAutoChainFailResult.gateStatus, 'fail', 'direct evaluator should fail when explicit auto-chain action has no dispatch evidence');
+  assert.match(JSON.stringify(directAutoChainFailResult), /explicit auto-chain next action requires dispatched-action evidence/, 'direct evaluator fail-path should mention missing dispatched-action evidence');

  const passInjected = await withPatchedWrapper(buildWrapperScript({
    classification: 'long_task',
@@ -154,8 +166,9 @@ async function main() {
    requiredNextAction: 'dispatch_follow_up_subagent',
    handoff: { mode: 'direct_reply' },
  }), async () => runScenario(forceRecall, requestText));
-  assert.match(failInjected, /gateStatus=fail/, 'hook fail-path should fail when wrapper claims progression without progressEvidence');
-  assert.match(failInjected, /reason=claimed progression without concrete progress evidence is forbidden/, 'hook fail-path should mention missing progress evidence');
+  assert.match(failInjected, /gateStatus=fail/, 'hook fail-path should fail when wrapper exposes explicit auto-chain action without dispatch evidence');
+  assert.match(failInjected, /reason=explicit auto-chain next action requires dispatched-action evidence/, 'hook fail-path should mention missing dispatched-action evidence');
+  assert.match(failInjected, /requiredEvidence=autoChainDispatchEvidence/, 'hook fail-path should require autoChainDispatchEvidence');

  const neutralInjected = await withPatchedWrapper(buildWrapperScript({
    classification: 'long_task',
@@ -164,11 +177,11 @@ async function main() {
    needsSubagent: false,
    needsOwnerDecision: false,
    silentLaunchOk: false,
-    requiredNextAction: 'summarize_findings_for_reply',
+    requiredNextAction: 'summarize findings for reply',
    handoff: { mode: 'direct_reply' },
  }), async () => runScenario(forceRecall, requestText));
-  assert.match(neutralInjected, /gateStatus=pass/, 'hook neutral-path should pass when wrapper does not claim progression');
-  assert.doesNotMatch(neutralInjected, /reason=claimed progression without concrete progress evidence is forbidden/, 'hook neutral-path should not fail on missing progress evidence without a progression claim');
+  assert.match(neutralInjected, /gateStatus=pass/, 'hook neutral-path should pass when wrapper does not expose an explicit auto-chain action');
+  assert.doesNotMatch(neutralInjected, /reason=explicit auto-chain next action requires dispatched-action evidence/, 'hook neutral-path should not fail on auto-chain evidence when no explicit tool action exists');

  const originalGateLock = await fs.readFile(gateLockPath, 'utf8');
  const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'force-recall-gate-lock-'));