fix: add wrapper-backed progress evidence integration path

2026-04-24 08:21:06 +08:00
parent 9f59449b0c
commit ef990d10b7
3 changed files with 55 additions and 1 deletions
--- a/hooks/force-recall/handler.ts
+++ b/hooks/force-recall/handler.ts
@@ -204,7 +204,7 @@ function buildGateLockInput(wrapperResult: any): Record<string, unknown> {
  return {
    classification: wrapperResult.classification,
    silentContinuation: silentCandidate,
-    claimedExecution: true,
+    claimedExecution: hasConcreteExecutionEvidence || (silentCandidate && wrapperResult.silentLaunchOk !== true),
    needsOwnerDecision,
    nextStep: hasConcreteExecutionEvidence ? requiredNextAction : "",
    requiredNextAction: hasConcreteExecutionEvidence ? requiredNextAction : "",
--- a/scripts/long_task_governor_wrapper.mjs
+++ b/scripts/long_task_governor_wrapper.mjs
@@ -72,6 +72,15 @@ function inferFromRequestText(input) {
  if (!input.needsSubagent && /\bsubagent\b/.test(text)) {
    inferred.needsSubagent = true;
  }
+  if (!input.checkpointTrigger && inferred.needsSubagent) {
+    inferred.checkpointTrigger = 'when delegated work returns or the next checkpoint fires';
+  }
+  if (!input.externalizedTrigger && inferred.needsSubagent) {
+    inferred.externalizedTrigger = 'wrapper-derived checkpoint artifact';
+  }
+  if (!input.triggerKind && inferred.needsSubagent) {
+    inferred.triggerKind = 'artifact';
+  }

  return inferred;
 }
@@ -107,6 +116,39 @@ function bootstrapTaskState(input, classificationResult) {
  };
 }

+function toSlug(value) {
+  return String(value || '')
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, '-')
+    .replace(/^-+|-+$/g, '')
+    .slice(0, 48);
+}
+
+function buildExternalizedCheckpointPath(input, classificationResult) {
+  if (classificationResult.classification !== 'long_task') return '';
+  if (!classificationResult.silentCandidate) return '';
+  if (!input.externalizedTrigger) return '';
+
+  const taskSeed = [input.currentStep, input.nextStep, input.waitingOn, input.blocker]
+    .map((value) => toSlug(value))
+    .filter(Boolean)
+    .join('-');
+  const stableSeed = taskSeed || 'long-task';
+
+  return `checkpoints/${stableSeed}.json`;
+}
+
+function buildProgressEvidence(input, classificationResult, externalizedCheckpointPath) {
+  if (classificationResult.classification !== 'long_task') return null;
+  if (!classificationResult.silentCandidate) return null;
+  if (!externalizedCheckpointPath) return null;
+
+  return {
+    sessionKey: toSlug([input.currentStep, input.waitingOn, input.nextStep].filter(Boolean).join('-')) || 'long-task-session',
+    checkpointPath: externalizedCheckpointPath,
+  };
+}
+
 function validateSilentLaunch(input, classificationResult) {
  if (!classificationResult.silentCandidate) {
    return {
@@ -154,6 +196,8 @@ function main() {
  const input = inferFromRequestText(normalizeRequest(raw));
  const classificationResult = classify(input);
  const taskRecord = bootstrapTaskState(input, classificationResult);
+  const externalizedCheckpointPath = buildExternalizedCheckpointPath(input, classificationResult);
+  const progressEvidence = buildProgressEvidence(input, classificationResult, externalizedCheckpointPath);
  const silentLaunch = validateSilentLaunch(input, classificationResult);
  const handoff = planHandoff(classificationResult);

@@ -164,6 +208,8 @@ function main() {
    needsCheckpoint: classificationResult.needsCheckpoint,
    needsSubagent: classificationResult.needsSubagent,
    taskRecord,
+    progressEvidence,
+    externalizedCheckpointPath,
    silentLaunchOk: silentLaunch.ok,
    silentLaunchReason: silentLaunch.reason,
    recommendedFallback: silentLaunch.recommendedFallback,
--- a/scripts/test_force_recall_long_task_preflight.mjs
+++ b/scripts/test_force_recall_long_task_preflight.mjs
@@ -70,6 +70,14 @@ async function main() {
    'Summarize the current dry-run planner state for technical inspection only.',
  ].join(' ');

+  const realWrapperInjected = await runScenario(forceRecall, 'Dispatch a subagent to inspect logs and wait for the result.');
+  assert.match(realWrapperInjected, /classification=long_task/, 'real wrapper integration should classify subagent wait as long_task');
+  assert.match(realWrapperInjected, /gateStatus=pass/, 'real wrapper integration should pass gate with real progress evidence');
+  assert.match(realWrapperInjected, /allowedResponseMode=silent_continuation/, 'real wrapper integration should preserve silent continuation allowance');
+  assert.doesNotMatch(realWrapperInjected, /reason=claimed progression without concrete progress evidence is forbidden/, 'real wrapper integration should not fail for missing progress evidence');
+  assert.doesNotMatch(realWrapperInjected, /requiredEvidence=progressEvidence/, 'real wrapper integration should not require synthetic progressEvidence repair');
+  assert.doesNotMatch(realWrapperInjected, /task_name/, 'real wrapper integration should not leak taskRecord.task_name fallback into gate/preflight text');
+
  const injected = await runScenario(forceRecall, requestText);

  const expectedSnippets = [