From 17449fa5197a47e6d70137a6dc3ee11f7ab719d5 Mon Sep 17 00:00:00 2001 From: Eve Date: Thu, 23 Apr 2026 14:49:15 +0800 Subject: [PATCH] feat: block progress claims without execution evidence --- hooks/force-recall/handler.ts | 6 ++++++ scripts/long_task_gate_lock.mjs | 13 ++++++++++++- scripts/test_force_recall_long_task_preflight.mjs | 2 ++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/hooks/force-recall/handler.ts b/hooks/force-recall/handler.ts index 8a60d2b..f2e1bd6 100644 --- a/hooks/force-recall/handler.ts +++ b/hooks/force-recall/handler.ts @@ -105,6 +105,11 @@ function buildGateLockInput(wrapperResult: any): Record { "bind_externalized_checkpoint_path_or_abort_silent_launch", ].includes(requiredNextAction), ); + const executionEvidence = hasConcreteExecutionEvidence + ? { + concreteNextAction: requiredNextAction, + } + : null; const hasExternalizedCheckpointEvidence = wrapperResult.silentLaunchOk === true && typeof wrapperResult.taskRecord?.task_name === "string" && wrapperResult.taskRecord.task_name.trim().length > 0; @@ -124,6 +129,7 @@ function buildGateLockInput(wrapperResult: any): Record { progressionClaim: claimedProgression, claimedProgression: claimedProgression, statusSummary: claimedProgression, + executionEvidence, toolCallEvidence: "", dispatchEvidence: "", fileChangeEvidence: "", diff --git a/scripts/long_task_gate_lock.mjs b/scripts/long_task_gate_lock.mjs index ec18672..87edab6 100644 --- a/scripts/long_task_gate_lock.mjs +++ b/scripts/long_task_gate_lock.mjs @@ -23,6 +23,7 @@ const EVIDENCE_FIELDS = Object.freeze({ 'statusSummary', ]), executionEvidence: Object.freeze([ + 'executionEvidence', 'toolCallEvidence', 'dispatchEvidence', 'fileChangeEvidence', @@ -153,6 +154,16 @@ function usesButtonPath(input) { return hasAcceptedValue(input, EVIDENCE_FIELDS.buttonPathMode, 'button_path'); } +function hasExecutionEvidence(input) { + return EVIDENCE_FIELDS.executionEvidence.some((fieldPath) => { + const value = getPathValue(input, fieldPath); + if (hasNonEmptyString(value)) return true; + if (Array.isArray(value)) return value.length > 0; + if (value && typeof value === 'object') return Object.keys(value).length > 0; + return false; + }); +} + function claimsProgressionWithoutEvidence(input) { const progressionClaim = EVIDENCE_FIELDS.progressionClaim .map((fieldPath) => getPathValue(input, fieldPath)) @@ -160,7 +171,7 @@ function claimsProgressionWithoutEvidence(input) { if (!hasNonEmptyString(progressionClaim)) return false; - return !hasAnyNonEmptyString(input, EVIDENCE_FIELDS.executionEvidence); + return !hasExecutionEvidence(input); } function evaluateGate(input) { diff --git a/scripts/test_force_recall_long_task_preflight.mjs b/scripts/test_force_recall_long_task_preflight.mjs index a0476d9..e36aead 100644 --- a/scripts/test_force_recall_long_task_preflight.mjs +++ b/scripts/test_force_recall_long_task_preflight.mjs @@ -66,6 +66,8 @@ async function main() { 'reason=silent long-task cannot continue without externalized checkpoint path', 'reason=claimed execution requires evidence of a concrete next action', 'reason=owner decision flow must end in button-path, not plain text', + 'reason=claimed progression without concrete execution evidence is forbidden', + 'requiredEvidence=executionEvidence', 'HARD_GATE: Block any plain-text handoff or silent-continuation claim when externalized checkpoint evidence is missing.', 'HARD_GATE: Block any reply path that says you already moved into the next task or are advancing the next step without concrete execution evidence.', 'HARD_GATE: Do not say you are already on the next task, already dispatched follow-up work, or already progressing in background unless you can point to actual tool execution, file changes, emitted messages, or checkpoint records.',