feat: block progress claims without execution evidence

This commit is contained in:
Eve
2026-04-23 14:49:15 +08:00
parent 44def4177c
commit 17449fa519
3 changed files with 20 additions and 1 deletions

View File

@@ -105,6 +105,11 @@ function buildGateLockInput(wrapperResult: any): Record<string, unknown> {
"bind_externalized_checkpoint_path_or_abort_silent_launch", "bind_externalized_checkpoint_path_or_abort_silent_launch",
].includes(requiredNextAction), ].includes(requiredNextAction),
); );
const executionEvidence = hasConcreteExecutionEvidence
? {
concreteNextAction: requiredNextAction,
}
: null;
const hasExternalizedCheckpointEvidence = wrapperResult.silentLaunchOk === true const hasExternalizedCheckpointEvidence = wrapperResult.silentLaunchOk === true
&& typeof wrapperResult.taskRecord?.task_name === "string" && typeof wrapperResult.taskRecord?.task_name === "string"
&& wrapperResult.taskRecord.task_name.trim().length > 0; && wrapperResult.taskRecord.task_name.trim().length > 0;
@@ -124,6 +129,7 @@ function buildGateLockInput(wrapperResult: any): Record<string, unknown> {
progressionClaim: claimedProgression, progressionClaim: claimedProgression,
claimedProgression: claimedProgression, claimedProgression: claimedProgression,
statusSummary: claimedProgression, statusSummary: claimedProgression,
executionEvidence,
toolCallEvidence: "", toolCallEvidence: "",
dispatchEvidence: "", dispatchEvidence: "",
fileChangeEvidence: "", fileChangeEvidence: "",

View File

@@ -23,6 +23,7 @@ const EVIDENCE_FIELDS = Object.freeze({
'statusSummary', 'statusSummary',
]), ]),
executionEvidence: Object.freeze([ executionEvidence: Object.freeze([
'executionEvidence',
'toolCallEvidence', 'toolCallEvidence',
'dispatchEvidence', 'dispatchEvidence',
'fileChangeEvidence', 'fileChangeEvidence',
@@ -153,6 +154,16 @@ function usesButtonPath(input) {
return hasAcceptedValue(input, EVIDENCE_FIELDS.buttonPathMode, 'button_path'); return hasAcceptedValue(input, EVIDENCE_FIELDS.buttonPathMode, 'button_path');
} }
function hasExecutionEvidence(input) {
return EVIDENCE_FIELDS.executionEvidence.some((fieldPath) => {
const value = getPathValue(input, fieldPath);
if (hasNonEmptyString(value)) return true;
if (Array.isArray(value)) return value.length > 0;
if (value && typeof value === 'object') return Object.keys(value).length > 0;
return false;
});
}
function claimsProgressionWithoutEvidence(input) { function claimsProgressionWithoutEvidence(input) {
const progressionClaim = EVIDENCE_FIELDS.progressionClaim const progressionClaim = EVIDENCE_FIELDS.progressionClaim
.map((fieldPath) => getPathValue(input, fieldPath)) .map((fieldPath) => getPathValue(input, fieldPath))
@@ -160,7 +171,7 @@ function claimsProgressionWithoutEvidence(input) {
if (!hasNonEmptyString(progressionClaim)) return false; if (!hasNonEmptyString(progressionClaim)) return false;
return !hasAnyNonEmptyString(input, EVIDENCE_FIELDS.executionEvidence); return !hasExecutionEvidence(input);
} }
function evaluateGate(input) { function evaluateGate(input) {

View File

@@ -66,6 +66,8 @@ async function main() {
'reason=silent long-task cannot continue without externalized checkpoint path', 'reason=silent long-task cannot continue without externalized checkpoint path',
'reason=claimed execution requires evidence of a concrete next action', 'reason=claimed execution requires evidence of a concrete next action',
'reason=owner decision flow must end in button-path, not plain text', 'reason=owner decision flow must end in button-path, not plain text',
'reason=claimed progression without concrete execution evidence is forbidden',
'requiredEvidence=executionEvidence',
'HARD_GATE: Block any plain-text handoff or silent-continuation claim when externalized checkpoint evidence is missing.', 'HARD_GATE: Block any plain-text handoff or silent-continuation claim when externalized checkpoint evidence is missing.',
'HARD_GATE: Block any reply path that says you already moved into the next task or are advancing the next step without concrete execution evidence.', 'HARD_GATE: Block any reply path that says you already moved into the next task or are advancing the next step without concrete execution evidence.',
'HARD_GATE: Do not say you are already on the next task, already dispatched follow-up work, or already progressing in background unless you can point to actual tool execution, file changes, emitted messages, or checkpoint records.', 'HARD_GATE: Do not say you are already on the next task, already dispatched follow-up work, or already progressing in background unless you can point to actual tool execution, file changes, emitted messages, or checkpoint records.',