feat: block progress claims without execution evidence
This commit is contained in:
@@ -109,6 +109,9 @@ function buildGateLockInput(wrapperResult: any): Record<string, unknown> {
|
|||||||
&& typeof wrapperResult.taskRecord?.task_name === "string"
|
&& typeof wrapperResult.taskRecord?.task_name === "string"
|
||||||
&& wrapperResult.taskRecord.task_name.trim().length > 0;
|
&& wrapperResult.taskRecord.task_name.trim().length > 0;
|
||||||
const hasButtonPathClosureEvidence = needsOwnerDecision && wrapperResult.silentLaunchOk === true;
|
const hasButtonPathClosureEvidence = needsOwnerDecision && wrapperResult.silentLaunchOk === true;
|
||||||
|
const claimedProgression = wrapperResult.classification === "long_task"
|
||||||
|
? "already progressing to the next step in background"
|
||||||
|
: "";
|
||||||
|
|
||||||
return {
|
return {
|
||||||
classification: wrapperResult.classification,
|
classification: wrapperResult.classification,
|
||||||
@@ -118,6 +121,14 @@ function buildGateLockInput(wrapperResult: any): Record<string, unknown> {
|
|||||||
nextStep: hasConcreteExecutionEvidence ? requiredNextAction : "",
|
nextStep: hasConcreteExecutionEvidence ? requiredNextAction : "",
|
||||||
requiredNextAction: hasConcreteExecutionEvidence ? requiredNextAction : "",
|
requiredNextAction: hasConcreteExecutionEvidence ? requiredNextAction : "",
|
||||||
concreteNextAction: hasConcreteExecutionEvidence ? requiredNextAction : "",
|
concreteNextAction: hasConcreteExecutionEvidence ? requiredNextAction : "",
|
||||||
|
progressionClaim: claimedProgression,
|
||||||
|
claimedProgression: claimedProgression,
|
||||||
|
statusSummary: claimedProgression,
|
||||||
|
toolCallEvidence: "",
|
||||||
|
dispatchEvidence: "",
|
||||||
|
fileChangeEvidence: "",
|
||||||
|
verificationEvidence: "",
|
||||||
|
checkpointArtifactEvidence: hasExternalizedCheckpointEvidence ? wrapperResult.taskRecord.task_name.trim() : "",
|
||||||
externalizedCheckpointPath: hasExternalizedCheckpointEvidence ? wrapperResult.taskRecord.task_name.trim() : "",
|
externalizedCheckpointPath: hasExternalizedCheckpointEvidence ? wrapperResult.taskRecord.task_name.trim() : "",
|
||||||
externalizedTrigger: hasExternalizedCheckpointEvidence ? "hook-preflight-checkpoint" : "",
|
externalizedTrigger: hasExternalizedCheckpointEvidence ? "hook-preflight-checkpoint" : "",
|
||||||
handoffMode: hasButtonPathClosureEvidence ? (wrapperResult.handoff?.mode ?? "button_path") : "direct_reply",
|
handoffMode: hasButtonPathClosureEvidence ? (wrapperResult.handoff?.mode ?? "button_path") : "direct_reply",
|
||||||
|
|||||||
@@ -17,6 +17,18 @@ const EVIDENCE_FIELDS = Object.freeze({
|
|||||||
'handoff.mode',
|
'handoff.mode',
|
||||||
'replyClosureMode',
|
'replyClosureMode',
|
||||||
]),
|
]),
|
||||||
|
progressionClaim: Object.freeze([
|
||||||
|
'progressionClaim',
|
||||||
|
'claimedProgression',
|
||||||
|
'statusSummary',
|
||||||
|
]),
|
||||||
|
executionEvidence: Object.freeze([
|
||||||
|
'toolCallEvidence',
|
||||||
|
'dispatchEvidence',
|
||||||
|
'fileChangeEvidence',
|
||||||
|
'verificationEvidence',
|
||||||
|
'checkpointArtifactEvidence',
|
||||||
|
]),
|
||||||
});
|
});
|
||||||
|
|
||||||
const GATE_REQUIREMENTS = Object.freeze({
|
const GATE_REQUIREMENTS = Object.freeze({
|
||||||
@@ -35,6 +47,11 @@ const GATE_REQUIREMENTS = Object.freeze({
|
|||||||
acceptedFields: EVIDENCE_FIELDS.buttonPathMode,
|
acceptedFields: EVIDENCE_FIELDS.buttonPathMode,
|
||||||
requiredValue: 'button_path',
|
requiredValue: 'button_path',
|
||||||
}),
|
}),
|
||||||
|
executionEvidence: Object.freeze({
|
||||||
|
evidenceKey: 'executionEvidence',
|
||||||
|
acceptedFields: EVIDENCE_FIELDS.executionEvidence,
|
||||||
|
requiredValue: 'tool call, dispatch, file change, verification output, or checkpoint artifact evidence',
|
||||||
|
}),
|
||||||
});
|
});
|
||||||
|
|
||||||
function fail(code, message) {
|
function fail(code, message) {
|
||||||
@@ -136,6 +153,16 @@ function usesButtonPath(input) {
|
|||||||
return hasAcceptedValue(input, EVIDENCE_FIELDS.buttonPathMode, 'button_path');
|
return hasAcceptedValue(input, EVIDENCE_FIELDS.buttonPathMode, 'button_path');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function claimsProgressionWithoutEvidence(input) {
|
||||||
|
const progressionClaim = EVIDENCE_FIELDS.progressionClaim
|
||||||
|
.map((fieldPath) => getPathValue(input, fieldPath))
|
||||||
|
.find((value) => hasNonEmptyString(value));
|
||||||
|
|
||||||
|
if (!hasNonEmptyString(progressionClaim)) return false;
|
||||||
|
|
||||||
|
return !hasAnyNonEmptyString(input, EVIDENCE_FIELDS.executionEvidence);
|
||||||
|
}
|
||||||
|
|
||||||
function evaluateGate(input) {
|
function evaluateGate(input) {
|
||||||
const gateRequired = isLongTask(input);
|
const gateRequired = isLongTask(input);
|
||||||
const reasons = [];
|
const reasons = [];
|
||||||
@@ -175,6 +202,13 @@ function evaluateGate(input) {
|
|||||||
allowedResponseModes.push('button_path');
|
allowedResponseModes.push('button_path');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (claimsProgressionWithoutEvidence(input)) {
|
||||||
|
failed = true;
|
||||||
|
reasons.push('claimed progression without concrete execution evidence is forbidden');
|
||||||
|
requiredEvidence.push(describeRequirement(GATE_REQUIREMENTS.executionEvidence));
|
||||||
|
allowedResponseModes.push('evidence_preserving_follow_up');
|
||||||
|
}
|
||||||
|
|
||||||
if (!failed) {
|
if (!failed) {
|
||||||
reasons.push('required long-task gate evidence is present or no gated condition was triggered');
|
reasons.push('required long-task gate evidence is present or no gated condition was triggered');
|
||||||
allowedResponseModes.push(needsOwnerDecision(input) ? 'button_path' : 'direct_reply');
|
allowedResponseModes.push(needsOwnerDecision(input) ? 'button_path' : 'direct_reply');
|
||||||
|
|||||||
Reference in New Issue
Block a user