From d8290b7ba2e3d3510f9427536e5fe25f858e3b99 Mon Sep 17 00:00:00 2001 From: Eve Date: Thu, 23 Apr 2026 20:11:49 +0800 Subject: [PATCH] fix: tighten auto-chain dispatch evidence --- hooks/force-recall/handler.ts | 24 +---- scripts/long_task_gate_lock.mjs | 95 +++++++++++++++---- .../test_force_recall_long_task_preflight.mjs | 48 +++++++++- 3 files changed, 128 insertions(+), 39 deletions(-) diff --git a/hooks/force-recall/handler.ts b/hooks/force-recall/handler.ts index fb66ea3..d79f9d7 100644 --- a/hooks/force-recall/handler.ts +++ b/hooks/force-recall/handler.ts @@ -98,23 +98,6 @@ function buildProgressEvidence(wrapperResult: any): Record | nu return Object.keys(progressEvidence).length > 0 ? progressEvidence : null; } -function buildAutoChainDispatchEvidence(wrapperResult: any, progressEvidence: Record | null): Record | null { - const taskName = typeof progressEvidence?.sessionKey === "string" - ? progressEvidence.sessionKey.trim() - : ""; - const requiredNextAction = typeof wrapperResult?.requiredNextAction === "string" - ? wrapperResult.requiredNextAction.trim() - : ""; - - if (!requiredNextAction || !taskName) return null; - - return { - action: requiredNextAction, - sessionKey: taskName, - dispatched: true, - }; -} - function shouldClaimProgression(wrapperResult: any, progressEvidence: Record | null): boolean { if (!wrapperResult || wrapperResult.classification !== "long_task") return false; if (progressEvidence && Object.keys(progressEvidence).length > 0) return true; @@ -170,8 +153,11 @@ function buildGateLockInput(wrapperResult: any): Record { } : null; const autoChainDispatchEvidence = hasConcreteExecutionEvidence - ? buildAutoChainDispatchEvidence(wrapperResult, progressEvidence) - : null; + && wrapperResult.autoChainDispatchEvidence + && typeof wrapperResult.autoChainDispatchEvidence === "object" + && !Array.isArray(wrapperResult.autoChainDispatchEvidence) + ? wrapperResult.autoChainDispatchEvidence + : null; const claimedProgression = shouldClaimProgression(wrapperResult, progressEvidence) ? "already progressing to the next step in background" : ""; diff --git a/scripts/long_task_gate_lock.mjs b/scripts/long_task_gate_lock.mjs index 1db87b2..61714b1 100644 --- a/scripts/long_task_gate_lock.mjs +++ b/scripts/long_task_gate_lock.mjs @@ -197,28 +197,85 @@ function hasExplicitAutoChainNextAction(input) { return hasAnyNonEmptyString(input, EVIDENCE_FIELDS.autoChainNextAction); } -function hasAutoChainDispatchEvidence(input) { - return EVIDENCE_FIELDS.autoChainDispatchEvidence.some((fieldPath) => { - const value = getPathValue(input, fieldPath); - if (hasNonEmptyString(value)) return true; - if (Array.isArray(value)) return value.length > 0; - if (value && typeof value === 'object') { - if (typeof value.action === 'string' && value.action.trim().length > 0) return true; - if (typeof value.concreteNextAction === 'string' && value.concreteNextAction.trim().length > 0) return true; - if (typeof value.dispatched === 'boolean') return value.dispatched; - return Object.keys(value).length > 0; - } - return false; - }); -} - -function requiresAutoChainDispatchEvidence(input) { - if (!hasExplicitAutoChainNextAction(input)) return false; +function getExplicitAutoChainNextAction(input) { const nextAction = EVIDENCE_FIELDS.autoChainNextAction .map((fieldPath) => getPathValue(input, fieldPath)) .find((value) => hasNonEmptyString(value)); - if (!hasNonEmptyString(nextAction)) return false; - return /^([a-z]+_)+[a-z]+$/i.test(nextAction.trim()); + + return hasNonEmptyString(nextAction) ? nextAction.trim() : ''; +} + +function isExecutableDispatchAction(action) { + if (!hasNonEmptyString(action)) return false; + return /^dispatch_[a-z0-9]+(?:_[a-z0-9]+)*$/i.test(action.trim()); +} + +function getNormalizedDispatchAction(value) { + if (!hasNonEmptyString(value)) return ''; + const normalized = value.trim(); + return isExecutableDispatchAction(normalized) ? normalized : ''; +} + +function getAutoChainDispatchEvidenceMatch(input) { + const nextAction = getExplicitAutoChainNextAction(input); + if (!isExecutableDispatchAction(nextAction)) return { required: false, matched: false }; + + for (const fieldPath of EVIDENCE_FIELDS.autoChainDispatchEvidence) { + const value = getPathValue(input, fieldPath); + if (!value) continue; + + if (hasNonEmptyString(value)) { + const directMatch = getNormalizedDispatchAction(value); + if (directMatch === nextAction) { + return { required: true, matched: true }; + } + continue; + } + + if (typeof value !== 'object' || Array.isArray(value)) continue; + + const candidates = [ + value.action, + value.dispatchedAction, + value.nextAction, + value.autoChainNextAction, + value.requiredNextAction, + value.concreteNextAction, + value.event, + value.type, + value.kind, + value.dispatchType, + value.dispatchAction, + ] + .map((candidate) => getNormalizedDispatchAction(candidate)) + .filter(Boolean); + + const declaresDispatch = [ + value.dispatched === true, + value.wasDispatched === true, + value.didDispatch === true, + value.dispatchEvent === true, + value.event === 'dispatch', + value.type === 'dispatch', + value.kind === 'dispatch', + value.dispatchType === 'dispatch', + ].some(Boolean); + + + if (declaresDispatch && candidates.includes(nextAction)) { + return { required: true, matched: true }; + } + } + + return { required: true, matched: false }; +} + +function hasAutoChainDispatchEvidence(input) { + return getAutoChainDispatchEvidenceMatch(input).matched; +} + +function requiresAutoChainDispatchEvidence(input) { + return getAutoChainDispatchEvidenceMatch(input).required; } function hasProgressEvidence(input) { diff --git a/scripts/test_force_recall_long_task_preflight.mjs b/scripts/test_force_recall_long_task_preflight.mjs index d0b1603..05775e7 100755 --- a/scripts/test_force_recall_long_task_preflight.mjs +++ b/scripts/test_force_recall_long_task_preflight.mjs @@ -107,7 +107,11 @@ async function main() { claimedExecution: true, concreteNextAction: 'dispatch_follow_up_subagent', autoChainNextAction: 'dispatch_follow_up_subagent', - autoChainDispatchEvidence: { sessionKey: 'task-123', dispatched: 'dispatch_follow_up_subagent' }, + autoChainDispatchEvidence: { + action: 'dispatch_follow_up_subagent', + dispatched: true, + event: 'dispatch', + }, progressionClaim: 'already progressing to the next step in background', progressEvidence: { sessionKey: 'task-123' }, }); @@ -142,6 +146,43 @@ async function main() { assert.equal(directAutoChainFailResult.gateStatus, 'fail', 'direct evaluator should fail when explicit auto-chain action has no dispatch evidence'); assert.match(JSON.stringify(directAutoChainFailResult), /explicit auto-chain next action requires dispatched-action evidence/, 'direct evaluator fail-path should mention missing dispatched-action evidence'); + const mismatchedDispatchEvidenceResult = evaluateGate({ + classification: 'long_task', + claimedExecution: true, + concreteNextAction: 'dispatch_follow_up_subagent', + autoChainNextAction: 'dispatch_follow_up_subagent', + autoChainDispatchEvidence: { + action: 'dispatch_other_subagent', + dispatched: true, + event: 'dispatch', + }, + }); + assert.equal(mismatchedDispatchEvidenceResult.gateStatus, 'fail', 'mismatched dispatch evidence should fail'); + assert.match(JSON.stringify(mismatchedDispatchEvidenceResult), /autoChainDispatchEvidence/, 'mismatched dispatch evidence should still require matching autoChainDispatchEvidence'); + + const fakeCheckpointDispatchEvidenceResult = evaluateGate({ + classification: 'long_task', + claimedExecution: true, + concreteNextAction: 'dispatch_follow_up_subagent', + autoChainNextAction: 'dispatch_follow_up_subagent', + autoChainDispatchEvidence: { + sessionKey: 'task-123', + checkpointPath: 'checkpoints/task-123.json', + }, + }); + assert.equal(fakeCheckpointDispatchEvidenceResult.gateStatus, 'fail', 'checkpoint/session-only dispatch evidence should fail'); + assert.match(JSON.stringify(fakeCheckpointDispatchEvidenceResult), /explicit auto-chain next action requires dispatched-action evidence/, 'checkpoint/session-only dispatch evidence should be rejected as fake dispatch evidence'); + + const neutralSnakeCaseResult = evaluateGate({ + classification: 'long_task', + claimedExecution: true, + concreteNextAction: 'summarize findings for reply', + autoChainNextAction: 'checkpoint_session_metadata_only', + executionEvidence: { concreteNextAction: 'summarize findings for reply' }, + }); + assert.equal(neutralSnakeCaseResult.gateStatus, 'pass', 'neutral snake_case non-dispatch action should not trigger dispatch-evidence requirement'); + assert.doesNotMatch(JSON.stringify(neutralSnakeCaseResult), /autoChainDispatchEvidence/, 'neutral snake_case non-dispatch action should not mention dispatch-evidence requirement'); + const passInjected = await withPatchedWrapper(buildWrapperScript({ classification: 'long_task', silentCandidate: true, @@ -151,6 +192,11 @@ async function main() { silentLaunchOk: true, silentLaunchReason: 'checkpoint established', requiredNextAction: 'dispatch_follow_up_subagent', + autoChainDispatchEvidence: { + action: 'dispatch_follow_up_subagent', + dispatched: true, + event: 'dispatch', + }, taskRecord: { task_name: 'task-123' }, handoff: { mode: 'direct_reply' }, }), async () => runScenario(forceRecall, requestText));