fix: stop fabricating auto-chain hook evidence
This commit is contained in:
@@ -245,9 +245,9 @@ function buildAutoChainPlannerInput(gateLockResult: GateLockResult | null, wrapp
|
|||||||
plannerInput.actorStage = "spec_review";
|
plannerInput.actorStage = "spec_review";
|
||||||
plannerInput.requiredNextAction = "request_code_quality_review";
|
plannerInput.requiredNextAction = "request_code_quality_review";
|
||||||
plannerInput.reviewOutcome = "pass";
|
plannerInput.reviewOutcome = "pass";
|
||||||
plannerInput.reviewEvidence = wrapperResult?.reviewEvidence && typeof wrapperResult.reviewEvidence === "object" && !Array.isArray(wrapperResult.reviewEvidence)
|
if (wrapperResult?.reviewEvidence && typeof wrapperResult.reviewEvidence === "object" && !Array.isArray(wrapperResult.reviewEvidence)) {
|
||||||
? wrapperResult.reviewEvidence
|
plannerInput.reviewEvidence = wrapperResult.reviewEvidence;
|
||||||
: { source: "hook_preflight", verdict: "pass" };
|
}
|
||||||
return plannerInput;
|
return plannerInput;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -257,9 +257,18 @@ function buildAutoChainPlannerInput(gateLockResult: GateLockResult | null, wrapp
|
|||||||
plannerInput.blocker = typeof wrapperResult?.silentLaunchReason === "string" && wrapperResult.silentLaunchReason.trim()
|
plannerInput.blocker = typeof wrapperResult?.silentLaunchReason === "string" && wrapperResult.silentLaunchReason.trim()
|
||||||
? wrapperResult.silentLaunchReason.trim()
|
? wrapperResult.silentLaunchReason.trim()
|
||||||
: "hook_preflight_blocker";
|
: "hook_preflight_blocker";
|
||||||
plannerInput.blockerEvidence = wrapperResult?.blockerEvidence && typeof wrapperResult.blockerEvidence === "object" && !Array.isArray(wrapperResult.blockerEvidence)
|
if (wrapperResult?.blockerEvidence && typeof wrapperResult.blockerEvidence === "object" && !Array.isArray(wrapperResult.blockerEvidence)) {
|
||||||
? wrapperResult.blockerEvidence
|
plannerInput.blockerEvidence = wrapperResult.blockerEvidence;
|
||||||
: { source: "hook_preflight", blocker: plannerInput.blocker };
|
}
|
||||||
|
return plannerInput;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (requiredNextAction === "dispatch_spec_review") {
|
||||||
|
plannerInput.actorStage = "implementer_result";
|
||||||
|
plannerInput.requiredNextAction = "request_spec_review";
|
||||||
|
if (wrapperResult?.implementationEvidence && typeof wrapperResult.implementationEvidence === "object" && !Array.isArray(wrapperResult.implementationEvidence)) {
|
||||||
|
plannerInput.executionEvidence = wrapperResult.implementationEvidence;
|
||||||
|
}
|
||||||
return plannerInput;
|
return plannerInput;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -65,6 +65,10 @@ async function main() {
|
|||||||
'I need you to review the behavior, choose the final accept/reject decision,',
|
'I need you to review the behavior, choose the final accept/reject decision,',
|
||||||
'and continue in background with a follow-up later.',
|
'and continue in background with a follow-up later.',
|
||||||
].join(' ');
|
].join(' ');
|
||||||
|
const plannerOnlyRequestText = [
|
||||||
|
'Please inspect the workspace files and verify the hook injection path.',
|
||||||
|
'Summarize the current dry-run planner state for technical inspection only.',
|
||||||
|
].join(' ');
|
||||||
|
|
||||||
const injected = await runScenario(forceRecall, requestText);
|
const injected = await runScenario(forceRecall, requestText);
|
||||||
|
|
||||||
@@ -252,6 +256,79 @@ async function main() {
|
|||||||
assert.match(neutralInjected, /autoChainAllowed=false/, 'hook neutral-path should keep auto-chain disabled');
|
assert.match(neutralInjected, /autoChainAllowed=false/, 'hook neutral-path should keep auto-chain disabled');
|
||||||
assert.doesNotMatch(neutralInjected, /reason=explicit auto-chain next action requires dispatched-action evidence/, 'hook neutral-path should not fail on auto-chain evidence when no explicit tool action exists');
|
assert.doesNotMatch(neutralInjected, /reason=explicit auto-chain next action requires dispatched-action evidence/, 'hook neutral-path should not fail on auto-chain evidence when no explicit tool action exists');
|
||||||
|
|
||||||
|
const specReviewWithoutEvidenceInjected = await withPatchedWrapper(buildWrapperScript({
|
||||||
|
classification: 'long_task',
|
||||||
|
silentCandidate: false,
|
||||||
|
needsCheckpoint: false,
|
||||||
|
needsSubagent: false,
|
||||||
|
needsOwnerDecision: false,
|
||||||
|
silentLaunchOk: true,
|
||||||
|
requiredNextAction: 'dispatch_code_quality_review',
|
||||||
|
autoChainDispatchEvidence: {
|
||||||
|
action: 'dispatch_code_quality_review',
|
||||||
|
dispatched: true,
|
||||||
|
event: 'dispatch',
|
||||||
|
},
|
||||||
|
taskRecord: { task_name: 'task-spec-review-missing-evidence' },
|
||||||
|
handoff: { mode: 'direct_reply' },
|
||||||
|
}), async () => runScenario(forceRecall, plannerOnlyRequestText));
|
||||||
|
assert.match(specReviewWithoutEvidenceInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook spec-review missing-evidence path should emit auto-chain plan block');
|
||||||
|
assert.match(specReviewWithoutEvidenceInjected, /plannerStatus=blocked_by_evidence/, 'hook spec-review missing-evidence path should block on missing evidence');
|
||||||
|
assert.match(specReviewWithoutEvidenceInjected, /derivedAction=none/, 'hook spec-review missing-evidence path should not derive a dry-run action');
|
||||||
|
assert.match(specReviewWithoutEvidenceInjected, /dispatchMode=no_dispatch/, 'hook spec-review missing-evidence path should stay no-dispatch');
|
||||||
|
assert.match(specReviewWithoutEvidenceInjected, /autoChainAllowed=false/, 'hook spec-review missing-evidence path should not allow auto-chain');
|
||||||
|
assert.match(specReviewWithoutEvidenceInjected, /reason=review pass evidence missing for code quality review transition/, 'hook spec-review missing-evidence path should mention missing review evidence');
|
||||||
|
assert.match(specReviewWithoutEvidenceInjected, /requiredEvidence=reviewEvidence/, 'hook spec-review missing-evidence path should require reviewEvidence');
|
||||||
|
|
||||||
|
const fixSliceWithoutEvidenceInjected = await withPatchedWrapper(buildWrapperScript({
|
||||||
|
classification: 'long_task',
|
||||||
|
silentCandidate: false,
|
||||||
|
needsCheckpoint: false,
|
||||||
|
needsSubagent: false,
|
||||||
|
needsOwnerDecision: false,
|
||||||
|
silentLaunchOk: true,
|
||||||
|
silentLaunchReason: 'review blocked by findings',
|
||||||
|
requiredNextAction: 'dispatch_fix_slice',
|
||||||
|
autoChainDispatchEvidence: {
|
||||||
|
action: 'dispatch_fix_slice',
|
||||||
|
dispatched: true,
|
||||||
|
event: 'dispatch',
|
||||||
|
},
|
||||||
|
taskRecord: { task_name: 'task-fix-slice-missing-evidence' },
|
||||||
|
handoff: { mode: 'direct_reply' },
|
||||||
|
}), async () => runScenario(forceRecall, plannerOnlyRequestText));
|
||||||
|
assert.match(fixSliceWithoutEvidenceInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook fix-slice missing-evidence path should emit auto-chain plan block');
|
||||||
|
assert.match(fixSliceWithoutEvidenceInjected, /plannerStatus=blocked_by_evidence/, 'hook fix-slice missing-evidence path should block on missing evidence');
|
||||||
|
assert.match(fixSliceWithoutEvidenceInjected, /derivedAction=none/, 'hook fix-slice missing-evidence path should not derive a dry-run action');
|
||||||
|
assert.match(fixSliceWithoutEvidenceInjected, /dispatchMode=no_dispatch/, 'hook fix-slice missing-evidence path should stay no-dispatch');
|
||||||
|
assert.match(fixSliceWithoutEvidenceInjected, /autoChainAllowed=false/, 'hook fix-slice missing-evidence path should not allow auto-chain');
|
||||||
|
assert.match(fixSliceWithoutEvidenceInjected, /reason=blocker evidence missing for retry\/fix transition/, 'hook fix-slice missing-evidence path should mention missing blocker evidence');
|
||||||
|
assert.match(fixSliceWithoutEvidenceInjected, /requiredEvidence=blockerEvidence/, 'hook fix-slice missing-evidence path should require blockerEvidence');
|
||||||
|
|
||||||
|
const specReviewWithoutImplementationEvidenceInjected = await withPatchedWrapper(buildWrapperScript({
|
||||||
|
classification: 'long_task',
|
||||||
|
silentCandidate: false,
|
||||||
|
needsCheckpoint: false,
|
||||||
|
needsSubagent: false,
|
||||||
|
needsOwnerDecision: false,
|
||||||
|
silentLaunchOk: true,
|
||||||
|
requiredNextAction: 'dispatch_spec_review',
|
||||||
|
autoChainDispatchEvidence: {
|
||||||
|
action: 'dispatch_spec_review',
|
||||||
|
dispatched: true,
|
||||||
|
event: 'dispatch',
|
||||||
|
},
|
||||||
|
taskRecord: { task_name: 'task-implementation-missing-evidence' },
|
||||||
|
handoff: { mode: 'direct_reply' },
|
||||||
|
}), async () => runScenario(forceRecall, plannerOnlyRequestText));
|
||||||
|
assert.match(specReviewWithoutImplementationEvidenceInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook implementation missing-evidence path should emit auto-chain plan block');
|
||||||
|
assert.match(specReviewWithoutImplementationEvidenceInjected, /plannerStatus=blocked_by_evidence/, 'hook implementation missing-evidence path should block on missing evidence');
|
||||||
|
assert.match(specReviewWithoutImplementationEvidenceInjected, /derivedAction=none/, 'hook implementation missing-evidence path should not derive a dry-run action');
|
||||||
|
assert.match(specReviewWithoutImplementationEvidenceInjected, /dispatchMode=no_dispatch/, 'hook implementation missing-evidence path should stay no-dispatch');
|
||||||
|
assert.match(specReviewWithoutImplementationEvidenceInjected, /autoChainAllowed=false/, 'hook implementation missing-evidence path should not allow auto-chain');
|
||||||
|
assert.match(specReviewWithoutImplementationEvidenceInjected, /reason=implementation evidence missing for review-required next action/, 'hook implementation missing-evidence path should mention missing implementation evidence');
|
||||||
|
assert.match(specReviewWithoutImplementationEvidenceInjected, /requiredEvidence=executionEvidence/, 'hook implementation missing-evidence path should require executionEvidence');
|
||||||
|
|
||||||
const originalGateLock = await fs.readFile(gateLockPath, 'utf8');
|
const originalGateLock = await fs.readFile(gateLockPath, 'utf8');
|
||||||
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'force-recall-gate-lock-'));
|
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'force-recall-gate-lock-'));
|
||||||
const backupPath = path.join(tempDir, path.basename(gateLockPath));
|
const backupPath = path.join(tempDir, path.basename(gateLockPath));
|
||||||
|
|||||||
@@ -124,6 +124,40 @@ const scenarios = [
|
|||||||
requiredEvidenceIncludes: 'executionEvidence',
|
requiredEvidenceIncludes: 'executionEvidence',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: 'spec review pass without review evidence -> blocked_by_evidence',
|
||||||
|
input: {
|
||||||
|
gateStatus: 'pass',
|
||||||
|
actorStage: 'spec_review',
|
||||||
|
reviewOutcome: 'pass',
|
||||||
|
requiredNextAction: 'request_code_quality_review',
|
||||||
|
},
|
||||||
|
expected: {
|
||||||
|
plannerStatus: 'blocked_by_evidence',
|
||||||
|
derivedAction: 'none',
|
||||||
|
dispatchMode: 'no_dispatch',
|
||||||
|
autoChainAllowed: false,
|
||||||
|
reasonIncludes: 'review pass evidence missing',
|
||||||
|
requiredEvidenceIncludes: 'reviewEvidence',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'fix slice without blocker evidence -> blocked_by_evidence',
|
||||||
|
input: {
|
||||||
|
gateStatus: 'pass',
|
||||||
|
actorStage: 'review_result',
|
||||||
|
blocker: 'hook_preflight_blocker',
|
||||||
|
requiredNextAction: 'fix_review_findings',
|
||||||
|
},
|
||||||
|
expected: {
|
||||||
|
plannerStatus: 'blocked_by_evidence',
|
||||||
|
derivedAction: 'none',
|
||||||
|
dispatchMode: 'no_dispatch',
|
||||||
|
autoChainAllowed: false,
|
||||||
|
reasonIncludes: 'blocker evidence missing',
|
||||||
|
requiredEvidenceIncludes: 'blockerEvidence',
|
||||||
|
},
|
||||||
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
function runPlanner(input) {
|
function runPlanner(input) {
|
||||||
|
|||||||
Reference in New Issue
Block a user