From 17dd26cde75ba4e76a96c75968cd4f33146021ed Mon Sep 17 00:00:00 2001 From: Eve Date: Thu, 23 Apr 2026 19:34:24 +0800 Subject: [PATCH] feat: require auto-chain action evidence --- hooks/force-recall/handler.ts | 11 +++++ scripts/long_task_gate_lock.mjs | 43 +++++++++++++++++++ .../test_force_recall_long_task_preflight.mjs | 35 ++++++++++----- 3 files changed, 78 insertions(+), 11 deletions(-) diff --git a/hooks/force-recall/handler.ts b/hooks/force-recall/handler.ts index 3b22beb..5c7b1dd 100644 --- a/hooks/force-recall/handler.ts +++ b/hooks/force-recall/handler.ts @@ -146,11 +146,18 @@ function buildGateLockInput(wrapperResult: any): Record { "bind_externalized_checkpoint_path_or_abort_silent_launch", ].includes(requiredNextAction), ); + const autoChainNextAction = hasConcreteExecutionEvidence ? requiredNextAction : ""; const executionEvidence = hasConcreteExecutionEvidence ? { concreteNextAction: requiredNextAction, } : null; + const autoChainDispatchEvidence = progressEvidence && hasConcreteExecutionEvidence + ? { + sessionKey: typeof progressEvidence.sessionKey === "string" ? progressEvidence.sessionKey : "", + concreteNextAction: requiredNextAction, + } + : null; const claimedProgression = shouldClaimProgression(wrapperResult, progressEvidence) ? "already progressing to the next step in background" : ""; @@ -170,6 +177,8 @@ function buildGateLockInput(wrapperResult: any): Record { nextStep: hasConcreteExecutionEvidence ? requiredNextAction : "", requiredNextAction: hasConcreteExecutionEvidence ? requiredNextAction : "", concreteNextAction: hasConcreteExecutionEvidence ? requiredNextAction : "", + autoChainNextAction, + autoChainDispatchEvidence, progressionClaim: claimedProgression, claimedProgression: claimedProgression, statusSummary: claimedProgression, @@ -274,6 +283,7 @@ function buildGateLockBlock(gateLockResult: GateLockResult | null): string { "- ENFORCEMENT: Hook input should include progressEvidence (or equivalent concrete fields) whenever a progression claim is present.", "- ENFORCEMENT: Forbidden path: plain-text handoff that pretends the long task is already continuing without an externalized checkpoint.", "- ENFORCEMENT: Forbidden path: stating you have already entered the next task/step when the record only contains planning language and no concrete execution evidence.", + "- ENFORCEMENT: If hook input carries autoChainNextAction, it must also carry matching autoChainDispatchEvidence before the gate may pass that auto-chain step.", ]; if (gateLockResult.gateStatus === "fail") { @@ -282,6 +292,7 @@ function buildGateLockBlock(gateLockResult: GateLockResult | null): string { lines.push("- HARD_GATE: If a progression claim exists, the hook input must supply progressEvidence (or equivalent concrete fields) before the claim can pass gate."); lines.push("- HARD_GATE: Do not say you are already on the next task, already dispatched follow-up work, or already progressing in background unless you can point to a sessionKey, runId, modified_files record, verification result, actual tool execution, file changes, emitted messages, or checkpoint records."); lines.push("- HARD_GATE: If required evidence is missing, ask for/produce the checkpoint or downgrade to a non-silent, evidence-preserving follow-up."); + lines.push("- HARD_GATE: If autoChainNextAction is explicit, you must actually dispatch it and surface autoChainDispatchEvidence; otherwise the gate fails."); lines.push("- HARD_GATE: If owner decision is involved, do not replace button-path closure with plain-text handoff."); } diff --git a/scripts/long_task_gate_lock.mjs b/scripts/long_task_gate_lock.mjs index 86700f1..1de0bc9 100644 --- a/scripts/long_task_gate_lock.mjs +++ b/scripts/long_task_gate_lock.mjs @@ -30,6 +30,14 @@ const EVIDENCE_FIELDS = Object.freeze({ 'verificationEvidence', 'checkpointArtifactEvidence', ]), + autoChainNextAction: Object.freeze([ + 'autoChainNextAction', + 'auto_chain_next_action', + ]), + autoChainDispatchEvidence: Object.freeze([ + 'autoChainDispatchEvidence', + 'auto_chain_dispatch_evidence', + ]), progressEvidence: Object.freeze([ 'progressEvidence', 'progressEvidence.sessionKey', @@ -64,6 +72,11 @@ const GATE_REQUIREMENTS = Object.freeze({ acceptedFields: EVIDENCE_FIELDS.executionEvidence, requiredValue: 'tool call, dispatch, file change, verification output, or checkpoint artifact evidence', }), + autoChainDispatchEvidence: Object.freeze({ + evidenceKey: 'autoChainDispatchEvidence', + acceptedFields: EVIDENCE_FIELDS.autoChainDispatchEvidence, + requiredValue: 'dispatched-action evidence for the explicit auto-chain next action', + }), progressEvidence: Object.freeze({ evidenceKey: 'progressEvidence', acceptedFields: EVIDENCE_FIELDS.progressEvidence, @@ -180,6 +193,29 @@ function hasExecutionEvidence(input) { }); } +function hasExplicitAutoChainNextAction(input) { + return hasAnyNonEmptyString(input, EVIDENCE_FIELDS.autoChainNextAction); +} + +function hasAutoChainDispatchEvidence(input) { + return EVIDENCE_FIELDS.autoChainDispatchEvidence.some((fieldPath) => { + const value = getPathValue(input, fieldPath); + if (hasNonEmptyString(value)) return true; + if (Array.isArray(value)) return value.length > 0; + if (value && typeof value === 'object') return Object.keys(value).length > 0; + return false; + }); +} + +function requiresAutoChainDispatchEvidence(input) { + if (!hasExplicitAutoChainNextAction(input)) return false; + const nextAction = EVIDENCE_FIELDS.autoChainNextAction + .map((fieldPath) => getPathValue(input, fieldPath)) + .find((value) => hasNonEmptyString(value)); + if (!hasNonEmptyString(nextAction)) return false; + return /^([a-z]+_)+[a-z]+$/i.test(nextAction.trim()); +} + function hasProgressEvidence(input) { return EVIDENCE_FIELDS.progressEvidence.some((fieldPath) => { const value = getPathValue(input, fieldPath); @@ -249,6 +285,13 @@ function evaluateGate(input) { allowedResponseModes.push('evidence_preserving_follow_up'); } + if (requiresAutoChainDispatchEvidence(input) && !hasAutoChainDispatchEvidence(input)) { + failed = true; + reasons.push('explicit auto-chain next action requires dispatched-action evidence'); + requiredEvidence.push(describeRequirement(GATE_REQUIREMENTS.autoChainDispatchEvidence)); + allowedResponseModes.push('dispatch_required'); + } + if (!failed) { reasons.push('required long-task gate evidence is present or no gated condition was triggered'); allowedResponseModes.push(needsOwnerDecision(input) ? 'button_path' : 'direct_reply'); diff --git a/scripts/test_force_recall_long_task_preflight.mjs b/scripts/test_force_recall_long_task_preflight.mjs index f9abc8f..d0b1603 100755 --- a/scripts/test_force_recall_long_task_preflight.mjs +++ b/scripts/test_force_recall_long_task_preflight.mjs @@ -106,6 +106,8 @@ async function main() { classification: 'long_task', claimedExecution: true, concreteNextAction: 'dispatch_follow_up_subagent', + autoChainNextAction: 'dispatch_follow_up_subagent', + autoChainDispatchEvidence: { sessionKey: 'task-123', dispatched: 'dispatch_follow_up_subagent' }, progressionClaim: 'already progressing to the next step in background', progressEvidence: { sessionKey: 'task-123' }, }); @@ -115,20 +117,30 @@ async function main() { classification: 'long_task', claimedExecution: true, concreteNextAction: 'dispatch_follow_up_subagent', + autoChainNextAction: 'dispatch_follow_up_subagent', progressionClaim: 'already progressing to the next step in background', executionEvidence: { concreteNextAction: 'dispatch_follow_up_subagent' }, }); - assert.equal(failResult.gateStatus, 'fail', 'fail-path should fail when progressionClaim lacks progressEvidence'); - assert.match(JSON.stringify(failResult), /progressEvidence/, 'fail-path should require progressEvidence'); + assert.equal(failResult.gateStatus, 'fail', 'fail-path should fail when explicit auto-chain action lacks dispatch evidence'); + assert.match(JSON.stringify(failResult), /autoChainDispatchEvidence/, 'fail-path should require autoChainDispatchEvidence'); const neutralResult = evaluateGate({ classification: 'long_task', claimedExecution: true, - concreteNextAction: 'dispatch_follow_up_subagent', - executionEvidence: { concreteNextAction: 'dispatch_follow_up_subagent' }, + concreteNextAction: 'summarize findings for reply', + executionEvidence: { concreteNextAction: 'summarize findings for reply' }, }); - assert.equal(neutralResult.gateStatus, 'pass', 'neutral-path should pass when there is no progression claim'); - assert.doesNotMatch(JSON.stringify(neutralResult), /progressEvidence/, 'neutral-path should not require progressEvidence'); + assert.equal(neutralResult.gateStatus, 'pass', 'neutral-path should pass when there is no explicit auto-chain next action'); + assert.doesNotMatch(JSON.stringify(neutralResult), /autoChainDispatchEvidence/, 'neutral-path should not require auto-chain dispatch evidence'); + + const directAutoChainFailResult = evaluateGate({ + classification: 'long_task', + claimedExecution: true, + concreteNextAction: 'dispatch_follow_up_subagent', + autoChainNextAction: 'dispatch_follow_up_subagent', + }); + assert.equal(directAutoChainFailResult.gateStatus, 'fail', 'direct evaluator should fail when explicit auto-chain action has no dispatch evidence'); + assert.match(JSON.stringify(directAutoChainFailResult), /explicit auto-chain next action requires dispatched-action evidence/, 'direct evaluator fail-path should mention missing dispatched-action evidence'); const passInjected = await withPatchedWrapper(buildWrapperScript({ classification: 'long_task', @@ -154,8 +166,9 @@ async function main() { requiredNextAction: 'dispatch_follow_up_subagent', handoff: { mode: 'direct_reply' }, }), async () => runScenario(forceRecall, requestText)); - assert.match(failInjected, /gateStatus=fail/, 'hook fail-path should fail when wrapper claims progression without progressEvidence'); - assert.match(failInjected, /reason=claimed progression without concrete progress evidence is forbidden/, 'hook fail-path should mention missing progress evidence'); + assert.match(failInjected, /gateStatus=fail/, 'hook fail-path should fail when wrapper exposes explicit auto-chain action without dispatch evidence'); + assert.match(failInjected, /reason=explicit auto-chain next action requires dispatched-action evidence/, 'hook fail-path should mention missing dispatched-action evidence'); + assert.match(failInjected, /requiredEvidence=autoChainDispatchEvidence/, 'hook fail-path should require autoChainDispatchEvidence'); const neutralInjected = await withPatchedWrapper(buildWrapperScript({ classification: 'long_task', @@ -164,11 +177,11 @@ async function main() { needsSubagent: false, needsOwnerDecision: false, silentLaunchOk: false, - requiredNextAction: 'summarize_findings_for_reply', + requiredNextAction: 'summarize findings for reply', handoff: { mode: 'direct_reply' }, }), async () => runScenario(forceRecall, requestText)); - assert.match(neutralInjected, /gateStatus=pass/, 'hook neutral-path should pass when wrapper does not claim progression'); - assert.doesNotMatch(neutralInjected, /reason=claimed progression without concrete progress evidence is forbidden/, 'hook neutral-path should not fail on missing progress evidence without a progression claim'); + assert.match(neutralInjected, /gateStatus=pass/, 'hook neutral-path should pass when wrapper does not expose an explicit auto-chain action'); + assert.doesNotMatch(neutralInjected, /reason=explicit auto-chain next action requires dispatched-action evidence/, 'hook neutral-path should not fail on auto-chain evidence when no explicit tool action exists'); const originalGateLock = await fs.readFile(gateLockPath, 'utf8'); const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'force-recall-gate-lock-'));