feat: inject long-task gate lock into force-recall hook

2026-04-23 11:48:58 +08:00
parent 91529166fe
commit 2987c496c1
1 changed files with 52 additions and 16 deletions
--- a/scripts/test_force_recall_long_task_preflight.mjs
+++ b/scripts/test_force_recall_long_task_preflight.mjs
@@ -1,6 +1,7 @@
 #!/usr/bin/env node
 import assert from 'node:assert/strict';
 import fs from 'node:fs/promises';
+import os from 'node:os';
 import path from 'node:path';
 import { pathToFileURL } from 'node:url';
 import { stripTypeScriptTypes } from 'node:module';
@@ -18,17 +19,11 @@ async function importTsModule(tsPath) {
  return import(dataUrl);
 }

-async function main() {
-  await Promise.all([fs.access(wrapperPath), fs.access(gateLockPath)]);
-  const { default: forceRecall } = await importTsModule(handlerPath);
-  assert.equal(typeof forceRecall, 'function', 'force-recall handler should export default function');
-
-  const requestText = [
-    'Please inspect the workspace files and verify the hook injection path.',
-    'I need you to review the behavior, choose the final accept/reject decision,',
-    'and continue in background with a follow-up later.',
-  ].join(' ');
+function escapeRegex(snippet) {
+  return snippet.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+}

+async function runScenario(forceRecall, requestText) {
  const event = {
    type: 'message',
    action: 'preprocessed',
@@ -40,9 +35,23 @@ async function main() {
  };

  await forceRecall(event);
-
  const injected = event.context?.bodyForAgent;
  assert.equal(typeof injected, 'string', 'event.context.bodyForAgent should be a string after handler runs');
+  return injected;
+}
+
+async function main() {
+  await Promise.all([fs.access(wrapperPath), fs.access(gateLockPath)]);
+  const { default: forceRecall } = await importTsModule(handlerPath);
+  assert.equal(typeof forceRecall, 'function', 'force-recall handler should export default function');
+
+  const requestText = [
+    'Please inspect the workspace files and verify the hook injection path.',
+    'I need you to review the behavior, choose the final accept/reject decision,',
+    'and continue in background with a follow-up later.',
+  ].join(' ');
+
+  const injected = await runScenario(forceRecall, requestText);

  const expectedSnippets = [
    '[LONG_TASK_GOVERNOR_PREFLIGHT]',
@@ -57,22 +66,49 @@ async function main() {
    'reason=silent long-task cannot continue without externalized checkpoint path',
    'reason=claimed execution requires evidence of a concrete next action',
    'reason=owner decision flow must end in button-path, not plain text',
-    'ENFORCEMENT: Forbidden path: plain-text handoff that pretends the long task is already continuing without an externalized checkpoint.',
-    'ENFORCEMENT: Forbidden path: stating you have already entered the next task/step when the record only contains planning language and no concrete execution evidence.',
    'HARD_GATE: Block any plain-text handoff or silent-continuation claim when externalized checkpoint evidence is missing.',
    'HARD_GATE: Block any reply path that says you already moved into the next task or are advancing the next step without concrete execution evidence.',
    'HARD_GATE: Do not say you are already on the next task, already dispatched follow-up work, or already progressing in background unless you can point to actual tool execution, file changes, emitted messages, or checkpoint records.',
-    'Do not claim you have progressed into the next task or are already pushing the next step unless you have concrete evidence such as actual dispatch, tool calls, file changes, or a persisted checkpoint artifact.',
+    'ENFORCEMENT: Forbidden path: plain-text handoff that pretends the long task is already continuing without an externalized checkpoint.',
+    'ENFORCEMENT: Forbidden path: stating you have already entered the next task/step when the record only contains planning language and no concrete execution evidence.',
  ];

  for (const snippet of expectedSnippets) {
-    assert.match(injected, new RegExp(snippet.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')), `missing snippet: ${snippet}`);
+    assert.match(injected, new RegExp(escapeRegex(snippet)), `missing snippet: ${snippet}`);
+  }
+
+  const originalGateLock = await fs.readFile(gateLockPath, 'utf8');
+  const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'force-recall-gate-lock-'));
+  const backupPath = path.join(tempDir, path.basename(gateLockPath));
+  await fs.writeFile(backupPath, originalGateLock, 'utf8');
+  await fs.writeFile(gateLockPath, '#!/usr/bin/env node\nprocess.exit(1);\n', 'utf8');
+
+  let degradedInjected;
+  try {
+    degradedInjected = await runScenario(forceRecall, requestText);
+  } finally {
+    const backup = await fs.readFile(backupPath, 'utf8');
+    await fs.writeFile(gateLockPath, backup, 'utf8');
+    await fs.rm(tempDir, { recursive: true, force: true });
+  }
+
+  const degradedExpectedSnippets = [
+    '[LONG_TASK_GATE_LOCK]',
+    'gateStatus=degraded',
+    'gateRequired=unknown',
+    'HARD_GATE: Evaluator unavailable is not permission to claim silent continuation or next-task progression without verifiable evidence.',
+    'HARD_GATE: Fall back to a non-silent, evidence-preserving follow-up if you cannot prove checkpoint state or concrete execution.',
+  ];
+
+  for (const snippet of degradedExpectedSnippets) {
+    assert.match(degradedInjected, new RegExp(escapeRegex(snippet)), `missing degraded snippet: ${snippet}`);
  }

  const summary = {
    ok: true,
    checked: expectedSnippets,
-    bodyPreview: injected.split('\n').slice(0, 30),
+    degradedChecked: degradedExpectedSnippets,
+    bodyPreview: injected.split('\n').slice(0, 35),
  };

  process.stdout.write(JSON.stringify(summary, null, 2) + '\n');