From c7a7b4098deaf8cb8bd95304b5b4fd5c76e66e0c Mon Sep 17 00:00:00 2001 From: Eve Date: Fri, 24 Apr 2026 06:53:24 +0800 Subject: [PATCH] fix: stop fabricating auto-chain hook evidence --- hooks/force-recall/handler.ts | 21 +++-- .../test_force_recall_long_task_preflight.mjs | 77 +++++++++++++++++++ scripts/test_plan_long_task_auto_chain.mjs | 34 ++++++++ 3 files changed, 126 insertions(+), 6 deletions(-) diff --git a/hooks/force-recall/handler.ts b/hooks/force-recall/handler.ts index cc1bfd7..14ac9ea 100644 --- a/hooks/force-recall/handler.ts +++ b/hooks/force-recall/handler.ts @@ -245,9 +245,9 @@ function buildAutoChainPlannerInput(gateLockResult: GateLockResult | null, wrapp plannerInput.actorStage = "spec_review"; plannerInput.requiredNextAction = "request_code_quality_review"; plannerInput.reviewOutcome = "pass"; - plannerInput.reviewEvidence = wrapperResult?.reviewEvidence && typeof wrapperResult.reviewEvidence === "object" && !Array.isArray(wrapperResult.reviewEvidence) - ? wrapperResult.reviewEvidence - : { source: "hook_preflight", verdict: "pass" }; + if (wrapperResult?.reviewEvidence && typeof wrapperResult.reviewEvidence === "object" && !Array.isArray(wrapperResult.reviewEvidence)) { + plannerInput.reviewEvidence = wrapperResult.reviewEvidence; + } return plannerInput; } @@ -257,9 +257,18 @@ function buildAutoChainPlannerInput(gateLockResult: GateLockResult | null, wrapp plannerInput.blocker = typeof wrapperResult?.silentLaunchReason === "string" && wrapperResult.silentLaunchReason.trim() ? wrapperResult.silentLaunchReason.trim() : "hook_preflight_blocker"; - plannerInput.blockerEvidence = wrapperResult?.blockerEvidence && typeof wrapperResult.blockerEvidence === "object" && !Array.isArray(wrapperResult.blockerEvidence) - ? wrapperResult.blockerEvidence - : { source: "hook_preflight", blocker: plannerInput.blocker }; + if (wrapperResult?.blockerEvidence && typeof wrapperResult.blockerEvidence === "object" && !Array.isArray(wrapperResult.blockerEvidence)) { + plannerInput.blockerEvidence = wrapperResult.blockerEvidence; + } + return plannerInput; + } + + if (requiredNextAction === "dispatch_spec_review") { + plannerInput.actorStage = "implementer_result"; + plannerInput.requiredNextAction = "request_spec_review"; + if (wrapperResult?.implementationEvidence && typeof wrapperResult.implementationEvidence === "object" && !Array.isArray(wrapperResult.implementationEvidence)) { + plannerInput.executionEvidence = wrapperResult.implementationEvidence; + } return plannerInput; } diff --git a/scripts/test_force_recall_long_task_preflight.mjs b/scripts/test_force_recall_long_task_preflight.mjs index f503029..dfbe967 100755 --- a/scripts/test_force_recall_long_task_preflight.mjs +++ b/scripts/test_force_recall_long_task_preflight.mjs @@ -65,6 +65,10 @@ async function main() { 'I need you to review the behavior, choose the final accept/reject decision,', 'and continue in background with a follow-up later.', ].join(' '); + const plannerOnlyRequestText = [ + 'Please inspect the workspace files and verify the hook injection path.', + 'Summarize the current dry-run planner state for technical inspection only.', + ].join(' '); const injected = await runScenario(forceRecall, requestText); @@ -252,6 +256,79 @@ async function main() { assert.match(neutralInjected, /autoChainAllowed=false/, 'hook neutral-path should keep auto-chain disabled'); assert.doesNotMatch(neutralInjected, /reason=explicit auto-chain next action requires dispatched-action evidence/, 'hook neutral-path should not fail on auto-chain evidence when no explicit tool action exists'); + const specReviewWithoutEvidenceInjected = await withPatchedWrapper(buildWrapperScript({ + classification: 'long_task', + silentCandidate: false, + needsCheckpoint: false, + needsSubagent: false, + needsOwnerDecision: false, + silentLaunchOk: true, + requiredNextAction: 'dispatch_code_quality_review', + autoChainDispatchEvidence: { + action: 'dispatch_code_quality_review', + dispatched: true, + event: 'dispatch', + }, + taskRecord: { task_name: 'task-spec-review-missing-evidence' }, + handoff: { mode: 'direct_reply' }, + }), async () => runScenario(forceRecall, plannerOnlyRequestText)); + assert.match(specReviewWithoutEvidenceInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook spec-review missing-evidence path should emit auto-chain plan block'); + assert.match(specReviewWithoutEvidenceInjected, /plannerStatus=blocked_by_evidence/, 'hook spec-review missing-evidence path should block on missing evidence'); + assert.match(specReviewWithoutEvidenceInjected, /derivedAction=none/, 'hook spec-review missing-evidence path should not derive a dry-run action'); + assert.match(specReviewWithoutEvidenceInjected, /dispatchMode=no_dispatch/, 'hook spec-review missing-evidence path should stay no-dispatch'); + assert.match(specReviewWithoutEvidenceInjected, /autoChainAllowed=false/, 'hook spec-review missing-evidence path should not allow auto-chain'); + assert.match(specReviewWithoutEvidenceInjected, /reason=review pass evidence missing for code quality review transition/, 'hook spec-review missing-evidence path should mention missing review evidence'); + assert.match(specReviewWithoutEvidenceInjected, /requiredEvidence=reviewEvidence/, 'hook spec-review missing-evidence path should require reviewEvidence'); + + const fixSliceWithoutEvidenceInjected = await withPatchedWrapper(buildWrapperScript({ + classification: 'long_task', + silentCandidate: false, + needsCheckpoint: false, + needsSubagent: false, + needsOwnerDecision: false, + silentLaunchOk: true, + silentLaunchReason: 'review blocked by findings', + requiredNextAction: 'dispatch_fix_slice', + autoChainDispatchEvidence: { + action: 'dispatch_fix_slice', + dispatched: true, + event: 'dispatch', + }, + taskRecord: { task_name: 'task-fix-slice-missing-evidence' }, + handoff: { mode: 'direct_reply' }, + }), async () => runScenario(forceRecall, plannerOnlyRequestText)); + assert.match(fixSliceWithoutEvidenceInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook fix-slice missing-evidence path should emit auto-chain plan block'); + assert.match(fixSliceWithoutEvidenceInjected, /plannerStatus=blocked_by_evidence/, 'hook fix-slice missing-evidence path should block on missing evidence'); + assert.match(fixSliceWithoutEvidenceInjected, /derivedAction=none/, 'hook fix-slice missing-evidence path should not derive a dry-run action'); + assert.match(fixSliceWithoutEvidenceInjected, /dispatchMode=no_dispatch/, 'hook fix-slice missing-evidence path should stay no-dispatch'); + assert.match(fixSliceWithoutEvidenceInjected, /autoChainAllowed=false/, 'hook fix-slice missing-evidence path should not allow auto-chain'); + assert.match(fixSliceWithoutEvidenceInjected, /reason=blocker evidence missing for retry\/fix transition/, 'hook fix-slice missing-evidence path should mention missing blocker evidence'); + assert.match(fixSliceWithoutEvidenceInjected, /requiredEvidence=blockerEvidence/, 'hook fix-slice missing-evidence path should require blockerEvidence'); + + const specReviewWithoutImplementationEvidenceInjected = await withPatchedWrapper(buildWrapperScript({ + classification: 'long_task', + silentCandidate: false, + needsCheckpoint: false, + needsSubagent: false, + needsOwnerDecision: false, + silentLaunchOk: true, + requiredNextAction: 'dispatch_spec_review', + autoChainDispatchEvidence: { + action: 'dispatch_spec_review', + dispatched: true, + event: 'dispatch', + }, + taskRecord: { task_name: 'task-implementation-missing-evidence' }, + handoff: { mode: 'direct_reply' }, + }), async () => runScenario(forceRecall, plannerOnlyRequestText)); + assert.match(specReviewWithoutImplementationEvidenceInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook implementation missing-evidence path should emit auto-chain plan block'); + assert.match(specReviewWithoutImplementationEvidenceInjected, /plannerStatus=blocked_by_evidence/, 'hook implementation missing-evidence path should block on missing evidence'); + assert.match(specReviewWithoutImplementationEvidenceInjected, /derivedAction=none/, 'hook implementation missing-evidence path should not derive a dry-run action'); + assert.match(specReviewWithoutImplementationEvidenceInjected, /dispatchMode=no_dispatch/, 'hook implementation missing-evidence path should stay no-dispatch'); + assert.match(specReviewWithoutImplementationEvidenceInjected, /autoChainAllowed=false/, 'hook implementation missing-evidence path should not allow auto-chain'); + assert.match(specReviewWithoutImplementationEvidenceInjected, /reason=implementation evidence missing for review-required next action/, 'hook implementation missing-evidence path should mention missing implementation evidence'); + assert.match(specReviewWithoutImplementationEvidenceInjected, /requiredEvidence=executionEvidence/, 'hook implementation missing-evidence path should require executionEvidence'); + const originalGateLock = await fs.readFile(gateLockPath, 'utf8'); const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'force-recall-gate-lock-')); const backupPath = path.join(tempDir, path.basename(gateLockPath)); diff --git a/scripts/test_plan_long_task_auto_chain.mjs b/scripts/test_plan_long_task_auto_chain.mjs index fbf1d05..5ae5f8c 100644 --- a/scripts/test_plan_long_task_auto_chain.mjs +++ b/scripts/test_plan_long_task_auto_chain.mjs @@ -124,6 +124,40 @@ const scenarios = [ requiredEvidenceIncludes: 'executionEvidence', }, }, + { + name: 'spec review pass without review evidence -> blocked_by_evidence', + input: { + gateStatus: 'pass', + actorStage: 'spec_review', + reviewOutcome: 'pass', + requiredNextAction: 'request_code_quality_review', + }, + expected: { + plannerStatus: 'blocked_by_evidence', + derivedAction: 'none', + dispatchMode: 'no_dispatch', + autoChainAllowed: false, + reasonIncludes: 'review pass evidence missing', + requiredEvidenceIncludes: 'reviewEvidence', + }, + }, + { + name: 'fix slice without blocker evidence -> blocked_by_evidence', + input: { + gateStatus: 'pass', + actorStage: 'review_result', + blocker: 'hook_preflight_blocker', + requiredNextAction: 'fix_review_findings', + }, + expected: { + plannerStatus: 'blocked_by_evidence', + derivedAction: 'none', + dispatchMode: 'no_dispatch', + autoChainAllowed: false, + reasonIncludes: 'blocker evidence missing', + requiredEvidenceIncludes: 'blockerEvidence', + }, + }, ]; function runPlanner(input) {