From 44def4177ca1ba8949072c18d50d0e507aba0169 Mon Sep 17 00:00:00 2001 From: Eve Date: Thu, 23 Apr 2026 14:36:23 +0800 Subject: [PATCH] feat: block progress claims without execution evidence --- hooks/force-recall/handler.ts | 11 +++++++++++ scripts/long_task_gate_lock.mjs | 34 +++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/hooks/force-recall/handler.ts b/hooks/force-recall/handler.ts index 825cacb..8a60d2b 100644 --- a/hooks/force-recall/handler.ts +++ b/hooks/force-recall/handler.ts @@ -109,6 +109,9 @@ function buildGateLockInput(wrapperResult: any): Record { && typeof wrapperResult.taskRecord?.task_name === "string" && wrapperResult.taskRecord.task_name.trim().length > 0; const hasButtonPathClosureEvidence = needsOwnerDecision && wrapperResult.silentLaunchOk === true; + const claimedProgression = wrapperResult.classification === "long_task" + ? "already progressing to the next step in background" + : ""; return { classification: wrapperResult.classification, @@ -118,6 +121,14 @@ function buildGateLockInput(wrapperResult: any): Record { nextStep: hasConcreteExecutionEvidence ? requiredNextAction : "", requiredNextAction: hasConcreteExecutionEvidence ? requiredNextAction : "", concreteNextAction: hasConcreteExecutionEvidence ? requiredNextAction : "", + progressionClaim: claimedProgression, + claimedProgression: claimedProgression, + statusSummary: claimedProgression, + toolCallEvidence: "", + dispatchEvidence: "", + fileChangeEvidence: "", + verificationEvidence: "", + checkpointArtifactEvidence: hasExternalizedCheckpointEvidence ? wrapperResult.taskRecord.task_name.trim() : "", externalizedCheckpointPath: hasExternalizedCheckpointEvidence ? wrapperResult.taskRecord.task_name.trim() : "", externalizedTrigger: hasExternalizedCheckpointEvidence ? "hook-preflight-checkpoint" : "", handoffMode: hasButtonPathClosureEvidence ? (wrapperResult.handoff?.mode ?? "button_path") : "direct_reply", diff --git a/scripts/long_task_gate_lock.mjs b/scripts/long_task_gate_lock.mjs index e9e680b..ec18672 100644 --- a/scripts/long_task_gate_lock.mjs +++ b/scripts/long_task_gate_lock.mjs @@ -17,6 +17,18 @@ const EVIDENCE_FIELDS = Object.freeze({ 'handoff.mode', 'replyClosureMode', ]), + progressionClaim: Object.freeze([ + 'progressionClaim', + 'claimedProgression', + 'statusSummary', + ]), + executionEvidence: Object.freeze([ + 'toolCallEvidence', + 'dispatchEvidence', + 'fileChangeEvidence', + 'verificationEvidence', + 'checkpointArtifactEvidence', + ]), }); const GATE_REQUIREMENTS = Object.freeze({ @@ -35,6 +47,11 @@ const GATE_REQUIREMENTS = Object.freeze({ acceptedFields: EVIDENCE_FIELDS.buttonPathMode, requiredValue: 'button_path', }), + executionEvidence: Object.freeze({ + evidenceKey: 'executionEvidence', + acceptedFields: EVIDENCE_FIELDS.executionEvidence, + requiredValue: 'tool call, dispatch, file change, verification output, or checkpoint artifact evidence', + }), }); function fail(code, message) { @@ -136,6 +153,16 @@ function usesButtonPath(input) { return hasAcceptedValue(input, EVIDENCE_FIELDS.buttonPathMode, 'button_path'); } +function claimsProgressionWithoutEvidence(input) { + const progressionClaim = EVIDENCE_FIELDS.progressionClaim + .map((fieldPath) => getPathValue(input, fieldPath)) + .find((value) => hasNonEmptyString(value)); + + if (!hasNonEmptyString(progressionClaim)) return false; + + return !hasAnyNonEmptyString(input, EVIDENCE_FIELDS.executionEvidence); +} + function evaluateGate(input) { const gateRequired = isLongTask(input); const reasons = []; @@ -175,6 +202,13 @@ function evaluateGate(input) { allowedResponseModes.push('button_path'); } + if (claimsProgressionWithoutEvidence(input)) { + failed = true; + reasons.push('claimed progression without concrete execution evidence is forbidden'); + requiredEvidence.push(describeRequirement(GATE_REQUIREMENTS.executionEvidence)); + allowedResponseModes.push('evidence_preserving_follow_up'); + } + if (!failed) { reasons.push('required long-task gate evidence is present or no gated condition was triggered'); allowedResponseModes.push(needsOwnerDecision(input) ? 'button_path' : 'direct_reply');