From ef990d10b7de5c9f878f36affe3d4504798474f2 Mon Sep 17 00:00:00 2001 From: Eve Date: Fri, 24 Apr 2026 08:21:06 +0800 Subject: [PATCH] fix: add wrapper-backed progress evidence integration path --- hooks/force-recall/handler.ts | 2 +- scripts/long_task_governor_wrapper.mjs | 46 +++++++++++++++++++ .../test_force_recall_long_task_preflight.mjs | 8 ++++ 3 files changed, 55 insertions(+), 1 deletion(-) diff --git a/hooks/force-recall/handler.ts b/hooks/force-recall/handler.ts index ea0895a..979d393 100644 --- a/hooks/force-recall/handler.ts +++ b/hooks/force-recall/handler.ts @@ -204,7 +204,7 @@ function buildGateLockInput(wrapperResult: any): Record { return { classification: wrapperResult.classification, silentContinuation: silentCandidate, - claimedExecution: true, + claimedExecution: hasConcreteExecutionEvidence || (silentCandidate && wrapperResult.silentLaunchOk !== true), needsOwnerDecision, nextStep: hasConcreteExecutionEvidence ? requiredNextAction : "", requiredNextAction: hasConcreteExecutionEvidence ? requiredNextAction : "", diff --git a/scripts/long_task_governor_wrapper.mjs b/scripts/long_task_governor_wrapper.mjs index 2aff442..42430e1 100644 --- a/scripts/long_task_governor_wrapper.mjs +++ b/scripts/long_task_governor_wrapper.mjs @@ -72,6 +72,15 @@ function inferFromRequestText(input) { if (!input.needsSubagent && /\bsubagent\b/.test(text)) { inferred.needsSubagent = true; } + if (!input.checkpointTrigger && inferred.needsSubagent) { + inferred.checkpointTrigger = 'when delegated work returns or the next checkpoint fires'; + } + if (!input.externalizedTrigger && inferred.needsSubagent) { + inferred.externalizedTrigger = 'wrapper-derived checkpoint artifact'; + } + if (!input.triggerKind && inferred.needsSubagent) { + inferred.triggerKind = 'artifact'; + } return inferred; } @@ -107,6 +116,39 @@ function bootstrapTaskState(input, classificationResult) { }; } +function toSlug(value) { + return String(value || '') + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, '') + .slice(0, 48); +} + +function buildExternalizedCheckpointPath(input, classificationResult) { + if (classificationResult.classification !== 'long_task') return ''; + if (!classificationResult.silentCandidate) return ''; + if (!input.externalizedTrigger) return ''; + + const taskSeed = [input.currentStep, input.nextStep, input.waitingOn, input.blocker] + .map((value) => toSlug(value)) + .filter(Boolean) + .join('-'); + const stableSeed = taskSeed || 'long-task'; + + return `checkpoints/${stableSeed}.json`; +} + +function buildProgressEvidence(input, classificationResult, externalizedCheckpointPath) { + if (classificationResult.classification !== 'long_task') return null; + if (!classificationResult.silentCandidate) return null; + if (!externalizedCheckpointPath) return null; + + return { + sessionKey: toSlug([input.currentStep, input.waitingOn, input.nextStep].filter(Boolean).join('-')) || 'long-task-session', + checkpointPath: externalizedCheckpointPath, + }; +} + function validateSilentLaunch(input, classificationResult) { if (!classificationResult.silentCandidate) { return { @@ -154,6 +196,8 @@ function main() { const input = inferFromRequestText(normalizeRequest(raw)); const classificationResult = classify(input); const taskRecord = bootstrapTaskState(input, classificationResult); + const externalizedCheckpointPath = buildExternalizedCheckpointPath(input, classificationResult); + const progressEvidence = buildProgressEvidence(input, classificationResult, externalizedCheckpointPath); const silentLaunch = validateSilentLaunch(input, classificationResult); const handoff = planHandoff(classificationResult); @@ -164,6 +208,8 @@ function main() { needsCheckpoint: classificationResult.needsCheckpoint, needsSubagent: classificationResult.needsSubagent, taskRecord, + progressEvidence, + externalizedCheckpointPath, silentLaunchOk: silentLaunch.ok, silentLaunchReason: silentLaunch.reason, recommendedFallback: silentLaunch.recommendedFallback, diff --git a/scripts/test_force_recall_long_task_preflight.mjs b/scripts/test_force_recall_long_task_preflight.mjs index 4c00ce0..c57c1cf 100755 --- a/scripts/test_force_recall_long_task_preflight.mjs +++ b/scripts/test_force_recall_long_task_preflight.mjs @@ -70,6 +70,14 @@ async function main() { 'Summarize the current dry-run planner state for technical inspection only.', ].join(' '); + const realWrapperInjected = await runScenario(forceRecall, 'Dispatch a subagent to inspect logs and wait for the result.'); + assert.match(realWrapperInjected, /classification=long_task/, 'real wrapper integration should classify subagent wait as long_task'); + assert.match(realWrapperInjected, /gateStatus=pass/, 'real wrapper integration should pass gate with real progress evidence'); + assert.match(realWrapperInjected, /allowedResponseMode=silent_continuation/, 'real wrapper integration should preserve silent continuation allowance'); + assert.doesNotMatch(realWrapperInjected, /reason=claimed progression without concrete progress evidence is forbidden/, 'real wrapper integration should not fail for missing progress evidence'); + assert.doesNotMatch(realWrapperInjected, /requiredEvidence=progressEvidence/, 'real wrapper integration should not require synthetic progressEvidence repair'); + assert.doesNotMatch(realWrapperInjected, /task_name/, 'real wrapper integration should not leak taskRecord.task_name fallback into gate/preflight text'); + const injected = await runScenario(forceRecall, requestText); const expectedSnippets = [