feat: require auto-chain action evidence
This commit is contained in:
@@ -146,11 +146,18 @@ function buildGateLockInput(wrapperResult: any): Record<string, unknown> {
|
|||||||
"bind_externalized_checkpoint_path_or_abort_silent_launch",
|
"bind_externalized_checkpoint_path_or_abort_silent_launch",
|
||||||
].includes(requiredNextAction),
|
].includes(requiredNextAction),
|
||||||
);
|
);
|
||||||
|
const autoChainNextAction = hasConcreteExecutionEvidence ? requiredNextAction : "";
|
||||||
const executionEvidence = hasConcreteExecutionEvidence
|
const executionEvidence = hasConcreteExecutionEvidence
|
||||||
? {
|
? {
|
||||||
concreteNextAction: requiredNextAction,
|
concreteNextAction: requiredNextAction,
|
||||||
}
|
}
|
||||||
: null;
|
: null;
|
||||||
|
const autoChainDispatchEvidence = progressEvidence && hasConcreteExecutionEvidence
|
||||||
|
? {
|
||||||
|
sessionKey: typeof progressEvidence.sessionKey === "string" ? progressEvidence.sessionKey : "",
|
||||||
|
concreteNextAction: requiredNextAction,
|
||||||
|
}
|
||||||
|
: null;
|
||||||
const claimedProgression = shouldClaimProgression(wrapperResult, progressEvidence)
|
const claimedProgression = shouldClaimProgression(wrapperResult, progressEvidence)
|
||||||
? "already progressing to the next step in background"
|
? "already progressing to the next step in background"
|
||||||
: "";
|
: "";
|
||||||
@@ -170,6 +177,8 @@ function buildGateLockInput(wrapperResult: any): Record<string, unknown> {
|
|||||||
nextStep: hasConcreteExecutionEvidence ? requiredNextAction : "",
|
nextStep: hasConcreteExecutionEvidence ? requiredNextAction : "",
|
||||||
requiredNextAction: hasConcreteExecutionEvidence ? requiredNextAction : "",
|
requiredNextAction: hasConcreteExecutionEvidence ? requiredNextAction : "",
|
||||||
concreteNextAction: hasConcreteExecutionEvidence ? requiredNextAction : "",
|
concreteNextAction: hasConcreteExecutionEvidence ? requiredNextAction : "",
|
||||||
|
autoChainNextAction,
|
||||||
|
autoChainDispatchEvidence,
|
||||||
progressionClaim: claimedProgression,
|
progressionClaim: claimedProgression,
|
||||||
claimedProgression: claimedProgression,
|
claimedProgression: claimedProgression,
|
||||||
statusSummary: claimedProgression,
|
statusSummary: claimedProgression,
|
||||||
@@ -274,6 +283,7 @@ function buildGateLockBlock(gateLockResult: GateLockResult | null): string {
|
|||||||
"- ENFORCEMENT: Hook input should include progressEvidence (or equivalent concrete fields) whenever a progression claim is present.",
|
"- ENFORCEMENT: Hook input should include progressEvidence (or equivalent concrete fields) whenever a progression claim is present.",
|
||||||
"- ENFORCEMENT: Forbidden path: plain-text handoff that pretends the long task is already continuing without an externalized checkpoint.",
|
"- ENFORCEMENT: Forbidden path: plain-text handoff that pretends the long task is already continuing without an externalized checkpoint.",
|
||||||
"- ENFORCEMENT: Forbidden path: stating you have already entered the next task/step when the record only contains planning language and no concrete execution evidence.",
|
"- ENFORCEMENT: Forbidden path: stating you have already entered the next task/step when the record only contains planning language and no concrete execution evidence.",
|
||||||
|
"- ENFORCEMENT: If hook input carries autoChainNextAction, it must also carry matching autoChainDispatchEvidence before the gate may pass that auto-chain step.",
|
||||||
];
|
];
|
||||||
|
|
||||||
if (gateLockResult.gateStatus === "fail") {
|
if (gateLockResult.gateStatus === "fail") {
|
||||||
@@ -282,6 +292,7 @@ function buildGateLockBlock(gateLockResult: GateLockResult | null): string {
|
|||||||
lines.push("- HARD_GATE: If a progression claim exists, the hook input must supply progressEvidence (or equivalent concrete fields) before the claim can pass gate.");
|
lines.push("- HARD_GATE: If a progression claim exists, the hook input must supply progressEvidence (or equivalent concrete fields) before the claim can pass gate.");
|
||||||
lines.push("- HARD_GATE: Do not say you are already on the next task, already dispatched follow-up work, or already progressing in background unless you can point to a sessionKey, runId, modified_files record, verification result, actual tool execution, file changes, emitted messages, or checkpoint records.");
|
lines.push("- HARD_GATE: Do not say you are already on the next task, already dispatched follow-up work, or already progressing in background unless you can point to a sessionKey, runId, modified_files record, verification result, actual tool execution, file changes, emitted messages, or checkpoint records.");
|
||||||
lines.push("- HARD_GATE: If required evidence is missing, ask for/produce the checkpoint or downgrade to a non-silent, evidence-preserving follow-up.");
|
lines.push("- HARD_GATE: If required evidence is missing, ask for/produce the checkpoint or downgrade to a non-silent, evidence-preserving follow-up.");
|
||||||
|
lines.push("- HARD_GATE: If autoChainNextAction is explicit, you must actually dispatch it and surface autoChainDispatchEvidence; otherwise the gate fails.");
|
||||||
lines.push("- HARD_GATE: If owner decision is involved, do not replace button-path closure with plain-text handoff.");
|
lines.push("- HARD_GATE: If owner decision is involved, do not replace button-path closure with plain-text handoff.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -30,6 +30,14 @@ const EVIDENCE_FIELDS = Object.freeze({
|
|||||||
'verificationEvidence',
|
'verificationEvidence',
|
||||||
'checkpointArtifactEvidence',
|
'checkpointArtifactEvidence',
|
||||||
]),
|
]),
|
||||||
|
autoChainNextAction: Object.freeze([
|
||||||
|
'autoChainNextAction',
|
||||||
|
'auto_chain_next_action',
|
||||||
|
]),
|
||||||
|
autoChainDispatchEvidence: Object.freeze([
|
||||||
|
'autoChainDispatchEvidence',
|
||||||
|
'auto_chain_dispatch_evidence',
|
||||||
|
]),
|
||||||
progressEvidence: Object.freeze([
|
progressEvidence: Object.freeze([
|
||||||
'progressEvidence',
|
'progressEvidence',
|
||||||
'progressEvidence.sessionKey',
|
'progressEvidence.sessionKey',
|
||||||
@@ -64,6 +72,11 @@ const GATE_REQUIREMENTS = Object.freeze({
|
|||||||
acceptedFields: EVIDENCE_FIELDS.executionEvidence,
|
acceptedFields: EVIDENCE_FIELDS.executionEvidence,
|
||||||
requiredValue: 'tool call, dispatch, file change, verification output, or checkpoint artifact evidence',
|
requiredValue: 'tool call, dispatch, file change, verification output, or checkpoint artifact evidence',
|
||||||
}),
|
}),
|
||||||
|
autoChainDispatchEvidence: Object.freeze({
|
||||||
|
evidenceKey: 'autoChainDispatchEvidence',
|
||||||
|
acceptedFields: EVIDENCE_FIELDS.autoChainDispatchEvidence,
|
||||||
|
requiredValue: 'dispatched-action evidence for the explicit auto-chain next action',
|
||||||
|
}),
|
||||||
progressEvidence: Object.freeze({
|
progressEvidence: Object.freeze({
|
||||||
evidenceKey: 'progressEvidence',
|
evidenceKey: 'progressEvidence',
|
||||||
acceptedFields: EVIDENCE_FIELDS.progressEvidence,
|
acceptedFields: EVIDENCE_FIELDS.progressEvidence,
|
||||||
@@ -180,6 +193,29 @@ function hasExecutionEvidence(input) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function hasExplicitAutoChainNextAction(input) {
|
||||||
|
return hasAnyNonEmptyString(input, EVIDENCE_FIELDS.autoChainNextAction);
|
||||||
|
}
|
||||||
|
|
||||||
|
function hasAutoChainDispatchEvidence(input) {
|
||||||
|
return EVIDENCE_FIELDS.autoChainDispatchEvidence.some((fieldPath) => {
|
||||||
|
const value = getPathValue(input, fieldPath);
|
||||||
|
if (hasNonEmptyString(value)) return true;
|
||||||
|
if (Array.isArray(value)) return value.length > 0;
|
||||||
|
if (value && typeof value === 'object') return Object.keys(value).length > 0;
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function requiresAutoChainDispatchEvidence(input) {
|
||||||
|
if (!hasExplicitAutoChainNextAction(input)) return false;
|
||||||
|
const nextAction = EVIDENCE_FIELDS.autoChainNextAction
|
||||||
|
.map((fieldPath) => getPathValue(input, fieldPath))
|
||||||
|
.find((value) => hasNonEmptyString(value));
|
||||||
|
if (!hasNonEmptyString(nextAction)) return false;
|
||||||
|
return /^([a-z]+_)+[a-z]+$/i.test(nextAction.trim());
|
||||||
|
}
|
||||||
|
|
||||||
function hasProgressEvidence(input) {
|
function hasProgressEvidence(input) {
|
||||||
return EVIDENCE_FIELDS.progressEvidence.some((fieldPath) => {
|
return EVIDENCE_FIELDS.progressEvidence.some((fieldPath) => {
|
||||||
const value = getPathValue(input, fieldPath);
|
const value = getPathValue(input, fieldPath);
|
||||||
@@ -249,6 +285,13 @@ function evaluateGate(input) {
|
|||||||
allowedResponseModes.push('evidence_preserving_follow_up');
|
allowedResponseModes.push('evidence_preserving_follow_up');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (requiresAutoChainDispatchEvidence(input) && !hasAutoChainDispatchEvidence(input)) {
|
||||||
|
failed = true;
|
||||||
|
reasons.push('explicit auto-chain next action requires dispatched-action evidence');
|
||||||
|
requiredEvidence.push(describeRequirement(GATE_REQUIREMENTS.autoChainDispatchEvidence));
|
||||||
|
allowedResponseModes.push('dispatch_required');
|
||||||
|
}
|
||||||
|
|
||||||
if (!failed) {
|
if (!failed) {
|
||||||
reasons.push('required long-task gate evidence is present or no gated condition was triggered');
|
reasons.push('required long-task gate evidence is present or no gated condition was triggered');
|
||||||
allowedResponseModes.push(needsOwnerDecision(input) ? 'button_path' : 'direct_reply');
|
allowedResponseModes.push(needsOwnerDecision(input) ? 'button_path' : 'direct_reply');
|
||||||
|
|||||||
@@ -106,6 +106,8 @@ async function main() {
|
|||||||
classification: 'long_task',
|
classification: 'long_task',
|
||||||
claimedExecution: true,
|
claimedExecution: true,
|
||||||
concreteNextAction: 'dispatch_follow_up_subagent',
|
concreteNextAction: 'dispatch_follow_up_subagent',
|
||||||
|
autoChainNextAction: 'dispatch_follow_up_subagent',
|
||||||
|
autoChainDispatchEvidence: { sessionKey: 'task-123', dispatched: 'dispatch_follow_up_subagent' },
|
||||||
progressionClaim: 'already progressing to the next step in background',
|
progressionClaim: 'already progressing to the next step in background',
|
||||||
progressEvidence: { sessionKey: 'task-123' },
|
progressEvidence: { sessionKey: 'task-123' },
|
||||||
});
|
});
|
||||||
@@ -115,20 +117,30 @@ async function main() {
|
|||||||
classification: 'long_task',
|
classification: 'long_task',
|
||||||
claimedExecution: true,
|
claimedExecution: true,
|
||||||
concreteNextAction: 'dispatch_follow_up_subagent',
|
concreteNextAction: 'dispatch_follow_up_subagent',
|
||||||
|
autoChainNextAction: 'dispatch_follow_up_subagent',
|
||||||
progressionClaim: 'already progressing to the next step in background',
|
progressionClaim: 'already progressing to the next step in background',
|
||||||
executionEvidence: { concreteNextAction: 'dispatch_follow_up_subagent' },
|
executionEvidence: { concreteNextAction: 'dispatch_follow_up_subagent' },
|
||||||
});
|
});
|
||||||
assert.equal(failResult.gateStatus, 'fail', 'fail-path should fail when progressionClaim lacks progressEvidence');
|
assert.equal(failResult.gateStatus, 'fail', 'fail-path should fail when explicit auto-chain action lacks dispatch evidence');
|
||||||
assert.match(JSON.stringify(failResult), /progressEvidence/, 'fail-path should require progressEvidence');
|
assert.match(JSON.stringify(failResult), /autoChainDispatchEvidence/, 'fail-path should require autoChainDispatchEvidence');
|
||||||
|
|
||||||
const neutralResult = evaluateGate({
|
const neutralResult = evaluateGate({
|
||||||
classification: 'long_task',
|
classification: 'long_task',
|
||||||
claimedExecution: true,
|
claimedExecution: true,
|
||||||
concreteNextAction: 'dispatch_follow_up_subagent',
|
concreteNextAction: 'summarize findings for reply',
|
||||||
executionEvidence: { concreteNextAction: 'dispatch_follow_up_subagent' },
|
executionEvidence: { concreteNextAction: 'summarize findings for reply' },
|
||||||
});
|
});
|
||||||
assert.equal(neutralResult.gateStatus, 'pass', 'neutral-path should pass when there is no progression claim');
|
assert.equal(neutralResult.gateStatus, 'pass', 'neutral-path should pass when there is no explicit auto-chain next action');
|
||||||
assert.doesNotMatch(JSON.stringify(neutralResult), /progressEvidence/, 'neutral-path should not require progressEvidence');
|
assert.doesNotMatch(JSON.stringify(neutralResult), /autoChainDispatchEvidence/, 'neutral-path should not require auto-chain dispatch evidence');
|
||||||
|
|
||||||
|
const directAutoChainFailResult = evaluateGate({
|
||||||
|
classification: 'long_task',
|
||||||
|
claimedExecution: true,
|
||||||
|
concreteNextAction: 'dispatch_follow_up_subagent',
|
||||||
|
autoChainNextAction: 'dispatch_follow_up_subagent',
|
||||||
|
});
|
||||||
|
assert.equal(directAutoChainFailResult.gateStatus, 'fail', 'direct evaluator should fail when explicit auto-chain action has no dispatch evidence');
|
||||||
|
assert.match(JSON.stringify(directAutoChainFailResult), /explicit auto-chain next action requires dispatched-action evidence/, 'direct evaluator fail-path should mention missing dispatched-action evidence');
|
||||||
|
|
||||||
const passInjected = await withPatchedWrapper(buildWrapperScript({
|
const passInjected = await withPatchedWrapper(buildWrapperScript({
|
||||||
classification: 'long_task',
|
classification: 'long_task',
|
||||||
@@ -154,8 +166,9 @@ async function main() {
|
|||||||
requiredNextAction: 'dispatch_follow_up_subagent',
|
requiredNextAction: 'dispatch_follow_up_subagent',
|
||||||
handoff: { mode: 'direct_reply' },
|
handoff: { mode: 'direct_reply' },
|
||||||
}), async () => runScenario(forceRecall, requestText));
|
}), async () => runScenario(forceRecall, requestText));
|
||||||
assert.match(failInjected, /gateStatus=fail/, 'hook fail-path should fail when wrapper claims progression without progressEvidence');
|
assert.match(failInjected, /gateStatus=fail/, 'hook fail-path should fail when wrapper exposes explicit auto-chain action without dispatch evidence');
|
||||||
assert.match(failInjected, /reason=claimed progression without concrete progress evidence is forbidden/, 'hook fail-path should mention missing progress evidence');
|
assert.match(failInjected, /reason=explicit auto-chain next action requires dispatched-action evidence/, 'hook fail-path should mention missing dispatched-action evidence');
|
||||||
|
assert.match(failInjected, /requiredEvidence=autoChainDispatchEvidence/, 'hook fail-path should require autoChainDispatchEvidence');
|
||||||
|
|
||||||
const neutralInjected = await withPatchedWrapper(buildWrapperScript({
|
const neutralInjected = await withPatchedWrapper(buildWrapperScript({
|
||||||
classification: 'long_task',
|
classification: 'long_task',
|
||||||
@@ -164,11 +177,11 @@ async function main() {
|
|||||||
needsSubagent: false,
|
needsSubagent: false,
|
||||||
needsOwnerDecision: false,
|
needsOwnerDecision: false,
|
||||||
silentLaunchOk: false,
|
silentLaunchOk: false,
|
||||||
requiredNextAction: 'summarize_findings_for_reply',
|
requiredNextAction: 'summarize findings for reply',
|
||||||
handoff: { mode: 'direct_reply' },
|
handoff: { mode: 'direct_reply' },
|
||||||
}), async () => runScenario(forceRecall, requestText));
|
}), async () => runScenario(forceRecall, requestText));
|
||||||
assert.match(neutralInjected, /gateStatus=pass/, 'hook neutral-path should pass when wrapper does not claim progression');
|
assert.match(neutralInjected, /gateStatus=pass/, 'hook neutral-path should pass when wrapper does not expose an explicit auto-chain action');
|
||||||
assert.doesNotMatch(neutralInjected, /reason=claimed progression without concrete progress evidence is forbidden/, 'hook neutral-path should not fail on missing progress evidence without a progression claim');
|
assert.doesNotMatch(neutralInjected, /reason=explicit auto-chain next action requires dispatched-action evidence/, 'hook neutral-path should not fail on auto-chain evidence when no explicit tool action exists');
|
||||||
|
|
||||||
const originalGateLock = await fs.readFile(gateLockPath, 'utf8');
|
const originalGateLock = await fs.readFile(gateLockPath, 'utf8');
|
||||||
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'force-recall-gate-lock-'));
|
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'force-recall-gate-lock-'));
|
||||||
|
|||||||
Reference in New Issue
Block a user