feat: block progress claims without execution evidence

This commit is contained in:
Eve
2026-04-23 14:49:15 +08:00
parent 44def4177c
commit 17449fa519
3 changed files with 20 additions and 1 deletions

View File

@@ -23,6 +23,7 @@ const EVIDENCE_FIELDS = Object.freeze({
'statusSummary',
]),
executionEvidence: Object.freeze([
'executionEvidence',
'toolCallEvidence',
'dispatchEvidence',
'fileChangeEvidence',
@@ -153,6 +154,16 @@ function usesButtonPath(input) {
return hasAcceptedValue(input, EVIDENCE_FIELDS.buttonPathMode, 'button_path');
}
function hasExecutionEvidence(input) {
return EVIDENCE_FIELDS.executionEvidence.some((fieldPath) => {
const value = getPathValue(input, fieldPath);
if (hasNonEmptyString(value)) return true;
if (Array.isArray(value)) return value.length > 0;
if (value && typeof value === 'object') return Object.keys(value).length > 0;
return false;
});
}
function claimsProgressionWithoutEvidence(input) {
const progressionClaim = EVIDENCE_FIELDS.progressionClaim
.map((fieldPath) => getPathValue(input, fieldPath))
@@ -160,7 +171,7 @@ function claimsProgressionWithoutEvidence(input) {
if (!hasNonEmptyString(progressionClaim)) return false;
return !hasAnyNonEmptyString(input, EVIDENCE_FIELDS.executionEvidence);
return !hasExecutionEvidence(input);
}
function evaluateGate(input) {

View File

@@ -66,6 +66,8 @@ async function main() {
'reason=silent long-task cannot continue without externalized checkpoint path',
'reason=claimed execution requires evidence of a concrete next action',
'reason=owner decision flow must end in button-path, not plain text',
'reason=claimed progression without concrete execution evidence is forbidden',
'requiredEvidence=executionEvidence',
'HARD_GATE: Block any plain-text handoff or silent-continuation claim when externalized checkpoint evidence is missing.',
'HARD_GATE: Block any reply path that says you already moved into the next task or are advancing the next step without concrete execution evidence.',
'HARD_GATE: Do not say you are already on the next task, already dispatched follow-up work, or already progressing in background unless you can point to actual tool execution, file changes, emitted messages, or checkpoint records.',