feat: require auto-chain action evidence
This commit is contained in:
@@ -30,6 +30,14 @@ const EVIDENCE_FIELDS = Object.freeze({
|
||||
'verificationEvidence',
|
||||
'checkpointArtifactEvidence',
|
||||
]),
|
||||
autoChainNextAction: Object.freeze([
|
||||
'autoChainNextAction',
|
||||
'auto_chain_next_action',
|
||||
]),
|
||||
autoChainDispatchEvidence: Object.freeze([
|
||||
'autoChainDispatchEvidence',
|
||||
'auto_chain_dispatch_evidence',
|
||||
]),
|
||||
progressEvidence: Object.freeze([
|
||||
'progressEvidence',
|
||||
'progressEvidence.sessionKey',
|
||||
@@ -64,6 +72,11 @@ const GATE_REQUIREMENTS = Object.freeze({
|
||||
acceptedFields: EVIDENCE_FIELDS.executionEvidence,
|
||||
requiredValue: 'tool call, dispatch, file change, verification output, or checkpoint artifact evidence',
|
||||
}),
|
||||
autoChainDispatchEvidence: Object.freeze({
|
||||
evidenceKey: 'autoChainDispatchEvidence',
|
||||
acceptedFields: EVIDENCE_FIELDS.autoChainDispatchEvidence,
|
||||
requiredValue: 'dispatched-action evidence for the explicit auto-chain next action',
|
||||
}),
|
||||
progressEvidence: Object.freeze({
|
||||
evidenceKey: 'progressEvidence',
|
||||
acceptedFields: EVIDENCE_FIELDS.progressEvidence,
|
||||
@@ -180,6 +193,29 @@ function hasExecutionEvidence(input) {
|
||||
});
|
||||
}
|
||||
|
||||
function hasExplicitAutoChainNextAction(input) {
|
||||
return hasAnyNonEmptyString(input, EVIDENCE_FIELDS.autoChainNextAction);
|
||||
}
|
||||
|
||||
function hasAutoChainDispatchEvidence(input) {
|
||||
return EVIDENCE_FIELDS.autoChainDispatchEvidence.some((fieldPath) => {
|
||||
const value = getPathValue(input, fieldPath);
|
||||
if (hasNonEmptyString(value)) return true;
|
||||
if (Array.isArray(value)) return value.length > 0;
|
||||
if (value && typeof value === 'object') return Object.keys(value).length > 0;
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
function requiresAutoChainDispatchEvidence(input) {
|
||||
if (!hasExplicitAutoChainNextAction(input)) return false;
|
||||
const nextAction = EVIDENCE_FIELDS.autoChainNextAction
|
||||
.map((fieldPath) => getPathValue(input, fieldPath))
|
||||
.find((value) => hasNonEmptyString(value));
|
||||
if (!hasNonEmptyString(nextAction)) return false;
|
||||
return /^([a-z]+_)+[a-z]+$/i.test(nextAction.trim());
|
||||
}
|
||||
|
||||
function hasProgressEvidence(input) {
|
||||
return EVIDENCE_FIELDS.progressEvidence.some((fieldPath) => {
|
||||
const value = getPathValue(input, fieldPath);
|
||||
@@ -249,6 +285,13 @@ function evaluateGate(input) {
|
||||
allowedResponseModes.push('evidence_preserving_follow_up');
|
||||
}
|
||||
|
||||
if (requiresAutoChainDispatchEvidence(input) && !hasAutoChainDispatchEvidence(input)) {
|
||||
failed = true;
|
||||
reasons.push('explicit auto-chain next action requires dispatched-action evidence');
|
||||
requiredEvidence.push(describeRequirement(GATE_REQUIREMENTS.autoChainDispatchEvidence));
|
||||
allowedResponseModes.push('dispatch_required');
|
||||
}
|
||||
|
||||
if (!failed) {
|
||||
reasons.push('required long-task gate evidence is present or no gated condition was triggered');
|
||||
allowedResponseModes.push(needsOwnerDecision(input) ? 'button_path' : 'direct_reply');
|
||||
|
||||
@@ -106,6 +106,8 @@ async function main() {
|
||||
classification: 'long_task',
|
||||
claimedExecution: true,
|
||||
concreteNextAction: 'dispatch_follow_up_subagent',
|
||||
autoChainNextAction: 'dispatch_follow_up_subagent',
|
||||
autoChainDispatchEvidence: { sessionKey: 'task-123', dispatched: 'dispatch_follow_up_subagent' },
|
||||
progressionClaim: 'already progressing to the next step in background',
|
||||
progressEvidence: { sessionKey: 'task-123' },
|
||||
});
|
||||
@@ -115,20 +117,30 @@ async function main() {
|
||||
classification: 'long_task',
|
||||
claimedExecution: true,
|
||||
concreteNextAction: 'dispatch_follow_up_subagent',
|
||||
autoChainNextAction: 'dispatch_follow_up_subagent',
|
||||
progressionClaim: 'already progressing to the next step in background',
|
||||
executionEvidence: { concreteNextAction: 'dispatch_follow_up_subagent' },
|
||||
});
|
||||
assert.equal(failResult.gateStatus, 'fail', 'fail-path should fail when progressionClaim lacks progressEvidence');
|
||||
assert.match(JSON.stringify(failResult), /progressEvidence/, 'fail-path should require progressEvidence');
|
||||
assert.equal(failResult.gateStatus, 'fail', 'fail-path should fail when explicit auto-chain action lacks dispatch evidence');
|
||||
assert.match(JSON.stringify(failResult), /autoChainDispatchEvidence/, 'fail-path should require autoChainDispatchEvidence');
|
||||
|
||||
const neutralResult = evaluateGate({
|
||||
classification: 'long_task',
|
||||
claimedExecution: true,
|
||||
concreteNextAction: 'dispatch_follow_up_subagent',
|
||||
executionEvidence: { concreteNextAction: 'dispatch_follow_up_subagent' },
|
||||
concreteNextAction: 'summarize findings for reply',
|
||||
executionEvidence: { concreteNextAction: 'summarize findings for reply' },
|
||||
});
|
||||
assert.equal(neutralResult.gateStatus, 'pass', 'neutral-path should pass when there is no progression claim');
|
||||
assert.doesNotMatch(JSON.stringify(neutralResult), /progressEvidence/, 'neutral-path should not require progressEvidence');
|
||||
assert.equal(neutralResult.gateStatus, 'pass', 'neutral-path should pass when there is no explicit auto-chain next action');
|
||||
assert.doesNotMatch(JSON.stringify(neutralResult), /autoChainDispatchEvidence/, 'neutral-path should not require auto-chain dispatch evidence');
|
||||
|
||||
const directAutoChainFailResult = evaluateGate({
|
||||
classification: 'long_task',
|
||||
claimedExecution: true,
|
||||
concreteNextAction: 'dispatch_follow_up_subagent',
|
||||
autoChainNextAction: 'dispatch_follow_up_subagent',
|
||||
});
|
||||
assert.equal(directAutoChainFailResult.gateStatus, 'fail', 'direct evaluator should fail when explicit auto-chain action has no dispatch evidence');
|
||||
assert.match(JSON.stringify(directAutoChainFailResult), /explicit auto-chain next action requires dispatched-action evidence/, 'direct evaluator fail-path should mention missing dispatched-action evidence');
|
||||
|
||||
const passInjected = await withPatchedWrapper(buildWrapperScript({
|
||||
classification: 'long_task',
|
||||
@@ -154,8 +166,9 @@ async function main() {
|
||||
requiredNextAction: 'dispatch_follow_up_subagent',
|
||||
handoff: { mode: 'direct_reply' },
|
||||
}), async () => runScenario(forceRecall, requestText));
|
||||
assert.match(failInjected, /gateStatus=fail/, 'hook fail-path should fail when wrapper claims progression without progressEvidence');
|
||||
assert.match(failInjected, /reason=claimed progression without concrete progress evidence is forbidden/, 'hook fail-path should mention missing progress evidence');
|
||||
assert.match(failInjected, /gateStatus=fail/, 'hook fail-path should fail when wrapper exposes explicit auto-chain action without dispatch evidence');
|
||||
assert.match(failInjected, /reason=explicit auto-chain next action requires dispatched-action evidence/, 'hook fail-path should mention missing dispatched-action evidence');
|
||||
assert.match(failInjected, /requiredEvidence=autoChainDispatchEvidence/, 'hook fail-path should require autoChainDispatchEvidence');
|
||||
|
||||
const neutralInjected = await withPatchedWrapper(buildWrapperScript({
|
||||
classification: 'long_task',
|
||||
@@ -164,11 +177,11 @@ async function main() {
|
||||
needsSubagent: false,
|
||||
needsOwnerDecision: false,
|
||||
silentLaunchOk: false,
|
||||
requiredNextAction: 'summarize_findings_for_reply',
|
||||
requiredNextAction: 'summarize findings for reply',
|
||||
handoff: { mode: 'direct_reply' },
|
||||
}), async () => runScenario(forceRecall, requestText));
|
||||
assert.match(neutralInjected, /gateStatus=pass/, 'hook neutral-path should pass when wrapper does not claim progression');
|
||||
assert.doesNotMatch(neutralInjected, /reason=claimed progression without concrete progress evidence is forbidden/, 'hook neutral-path should not fail on missing progress evidence without a progression claim');
|
||||
assert.match(neutralInjected, /gateStatus=pass/, 'hook neutral-path should pass when wrapper does not expose an explicit auto-chain action');
|
||||
assert.doesNotMatch(neutralInjected, /reason=explicit auto-chain next action requires dispatched-action evidence/, 'hook neutral-path should not fail on auto-chain evidence when no explicit tool action exists');
|
||||
|
||||
const originalGateLock = await fs.readFile(gateLockPath, 'utf8');
|
||||
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'force-recall-gate-lock-'));
|
||||
|
||||
Reference in New Issue
Block a user