fix: tighten auto-chain dispatch evidence
This commit is contained in:
@@ -107,7 +107,11 @@ async function main() {
|
||||
claimedExecution: true,
|
||||
concreteNextAction: 'dispatch_follow_up_subagent',
|
||||
autoChainNextAction: 'dispatch_follow_up_subagent',
|
||||
autoChainDispatchEvidence: { sessionKey: 'task-123', dispatched: 'dispatch_follow_up_subagent' },
|
||||
autoChainDispatchEvidence: {
|
||||
action: 'dispatch_follow_up_subagent',
|
||||
dispatched: true,
|
||||
event: 'dispatch',
|
||||
},
|
||||
progressionClaim: 'already progressing to the next step in background',
|
||||
progressEvidence: { sessionKey: 'task-123' },
|
||||
});
|
||||
@@ -142,6 +146,43 @@ async function main() {
|
||||
assert.equal(directAutoChainFailResult.gateStatus, 'fail', 'direct evaluator should fail when explicit auto-chain action has no dispatch evidence');
|
||||
assert.match(JSON.stringify(directAutoChainFailResult), /explicit auto-chain next action requires dispatched-action evidence/, 'direct evaluator fail-path should mention missing dispatched-action evidence');
|
||||
|
||||
const mismatchedDispatchEvidenceResult = evaluateGate({
|
||||
classification: 'long_task',
|
||||
claimedExecution: true,
|
||||
concreteNextAction: 'dispatch_follow_up_subagent',
|
||||
autoChainNextAction: 'dispatch_follow_up_subagent',
|
||||
autoChainDispatchEvidence: {
|
||||
action: 'dispatch_other_subagent',
|
||||
dispatched: true,
|
||||
event: 'dispatch',
|
||||
},
|
||||
});
|
||||
assert.equal(mismatchedDispatchEvidenceResult.gateStatus, 'fail', 'mismatched dispatch evidence should fail');
|
||||
assert.match(JSON.stringify(mismatchedDispatchEvidenceResult), /autoChainDispatchEvidence/, 'mismatched dispatch evidence should still require matching autoChainDispatchEvidence');
|
||||
|
||||
const fakeCheckpointDispatchEvidenceResult = evaluateGate({
|
||||
classification: 'long_task',
|
||||
claimedExecution: true,
|
||||
concreteNextAction: 'dispatch_follow_up_subagent',
|
||||
autoChainNextAction: 'dispatch_follow_up_subagent',
|
||||
autoChainDispatchEvidence: {
|
||||
sessionKey: 'task-123',
|
||||
checkpointPath: 'checkpoints/task-123.json',
|
||||
},
|
||||
});
|
||||
assert.equal(fakeCheckpointDispatchEvidenceResult.gateStatus, 'fail', 'checkpoint/session-only dispatch evidence should fail');
|
||||
assert.match(JSON.stringify(fakeCheckpointDispatchEvidenceResult), /explicit auto-chain next action requires dispatched-action evidence/, 'checkpoint/session-only dispatch evidence should be rejected as fake dispatch evidence');
|
||||
|
||||
const neutralSnakeCaseResult = evaluateGate({
|
||||
classification: 'long_task',
|
||||
claimedExecution: true,
|
||||
concreteNextAction: 'summarize findings for reply',
|
||||
autoChainNextAction: 'checkpoint_session_metadata_only',
|
||||
executionEvidence: { concreteNextAction: 'summarize findings for reply' },
|
||||
});
|
||||
assert.equal(neutralSnakeCaseResult.gateStatus, 'pass', 'neutral snake_case non-dispatch action should not trigger dispatch-evidence requirement');
|
||||
assert.doesNotMatch(JSON.stringify(neutralSnakeCaseResult), /autoChainDispatchEvidence/, 'neutral snake_case non-dispatch action should not mention dispatch-evidence requirement');
|
||||
|
||||
const passInjected = await withPatchedWrapper(buildWrapperScript({
|
||||
classification: 'long_task',
|
||||
silentCandidate: true,
|
||||
@@ -151,6 +192,11 @@ async function main() {
|
||||
silentLaunchOk: true,
|
||||
silentLaunchReason: 'checkpoint established',
|
||||
requiredNextAction: 'dispatch_follow_up_subagent',
|
||||
autoChainDispatchEvidence: {
|
||||
action: 'dispatch_follow_up_subagent',
|
||||
dispatched: true,
|
||||
event: 'dispatch',
|
||||
},
|
||||
taskRecord: { task_name: 'task-123' },
|
||||
handoff: { mode: 'direct_reply' },
|
||||
}), async () => runScenario(forceRecall, requestText));
|
||||
|
||||
Reference in New Issue
Block a user