feat: block progress claims without execution evidence

This commit is contained in:
Eve
2026-04-23 14:36:23 +08:00
parent 2987c496c1
commit 44def4177c
2 changed files with 45 additions and 0 deletions

View File

@@ -109,6 +109,9 @@ function buildGateLockInput(wrapperResult: any): Record<string, unknown> {
&& typeof wrapperResult.taskRecord?.task_name === "string"
&& wrapperResult.taskRecord.task_name.trim().length > 0;
const hasButtonPathClosureEvidence = needsOwnerDecision && wrapperResult.silentLaunchOk === true;
const claimedProgression = wrapperResult.classification === "long_task"
? "already progressing to the next step in background"
: "";
return {
classification: wrapperResult.classification,
@@ -118,6 +121,14 @@ function buildGateLockInput(wrapperResult: any): Record<string, unknown> {
nextStep: hasConcreteExecutionEvidence ? requiredNextAction : "",
requiredNextAction: hasConcreteExecutionEvidence ? requiredNextAction : "",
concreteNextAction: hasConcreteExecutionEvidence ? requiredNextAction : "",
progressionClaim: claimedProgression,
claimedProgression: claimedProgression,
statusSummary: claimedProgression,
toolCallEvidence: "",
dispatchEvidence: "",
fileChangeEvidence: "",
verificationEvidence: "",
checkpointArtifactEvidence: hasExternalizedCheckpointEvidence ? wrapperResult.taskRecord.task_name.trim() : "",
externalizedCheckpointPath: hasExternalizedCheckpointEvidence ? wrapperResult.taskRecord.task_name.trim() : "",
externalizedTrigger: hasExternalizedCheckpointEvidence ? "hook-preflight-checkpoint" : "",
handoffMode: hasButtonPathClosureEvidence ? (wrapperResult.handoff?.mode ?? "button_path") : "direct_reply",

View File

@@ -17,6 +17,18 @@ const EVIDENCE_FIELDS = Object.freeze({
'handoff.mode',
'replyClosureMode',
]),
progressionClaim: Object.freeze([
'progressionClaim',
'claimedProgression',
'statusSummary',
]),
executionEvidence: Object.freeze([
'toolCallEvidence',
'dispatchEvidence',
'fileChangeEvidence',
'verificationEvidence',
'checkpointArtifactEvidence',
]),
});
const GATE_REQUIREMENTS = Object.freeze({
@@ -35,6 +47,11 @@ const GATE_REQUIREMENTS = Object.freeze({
acceptedFields: EVIDENCE_FIELDS.buttonPathMode,
requiredValue: 'button_path',
}),
executionEvidence: Object.freeze({
evidenceKey: 'executionEvidence',
acceptedFields: EVIDENCE_FIELDS.executionEvidence,
requiredValue: 'tool call, dispatch, file change, verification output, or checkpoint artifact evidence',
}),
});
function fail(code, message) {
@@ -136,6 +153,16 @@ function usesButtonPath(input) {
return hasAcceptedValue(input, EVIDENCE_FIELDS.buttonPathMode, 'button_path');
}
function claimsProgressionWithoutEvidence(input) {
const progressionClaim = EVIDENCE_FIELDS.progressionClaim
.map((fieldPath) => getPathValue(input, fieldPath))
.find((value) => hasNonEmptyString(value));
if (!hasNonEmptyString(progressionClaim)) return false;
return !hasAnyNonEmptyString(input, EVIDENCE_FIELDS.executionEvidence);
}
function evaluateGate(input) {
const gateRequired = isLongTask(input);
const reasons = [];
@@ -175,6 +202,13 @@ function evaluateGate(input) {
allowedResponseModes.push('button_path');
}
if (claimsProgressionWithoutEvidence(input)) {
failed = true;
reasons.push('claimed progression without concrete execution evidence is forbidden');
requiredEvidence.push(describeRequirement(GATE_REQUIREMENTS.executionEvidence));
allowedResponseModes.push('evidence_preserving_follow_up');
}
if (!failed) {
reasons.push('required long-task gate evidence is present or no gated condition was triggered');
allowedResponseModes.push(needsOwnerDecision(input) ? 'button_path' : 'direct_reply');