From 2987c496c17fb505aad431fbcdaf7901b0f2a077 Mon Sep 17 00:00:00 2001 From: Eve Date: Thu, 23 Apr 2026 11:48:58 +0800 Subject: [PATCH] feat: inject long-task gate lock into force-recall hook --- .../test_force_recall_long_task_preflight.mjs | 68 ++++++++++++++----- 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/scripts/test_force_recall_long_task_preflight.mjs b/scripts/test_force_recall_long_task_preflight.mjs index b5f93d4..a0476d9 100644 --- a/scripts/test_force_recall_long_task_preflight.mjs +++ b/scripts/test_force_recall_long_task_preflight.mjs @@ -1,6 +1,7 @@ #!/usr/bin/env node import assert from 'node:assert/strict'; import fs from 'node:fs/promises'; +import os from 'node:os'; import path from 'node:path'; import { pathToFileURL } from 'node:url'; import { stripTypeScriptTypes } from 'node:module'; @@ -18,17 +19,11 @@ async function importTsModule(tsPath) { return import(dataUrl); } -async function main() { - await Promise.all([fs.access(wrapperPath), fs.access(gateLockPath)]); - const { default: forceRecall } = await importTsModule(handlerPath); - assert.equal(typeof forceRecall, 'function', 'force-recall handler should export default function'); - - const requestText = [ - 'Please inspect the workspace files and verify the hook injection path.', - 'I need you to review the behavior, choose the final accept/reject decision,', - 'and continue in background with a follow-up later.', - ].join(' '); +function escapeRegex(snippet) { + return snippet.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} +async function runScenario(forceRecall, requestText) { const event = { type: 'message', action: 'preprocessed', @@ -40,9 +35,23 @@ async function main() { }; await forceRecall(event); - const injected = event.context?.bodyForAgent; assert.equal(typeof injected, 'string', 'event.context.bodyForAgent should be a string after handler runs'); + return injected; +} + +async function main() { + await Promise.all([fs.access(wrapperPath), fs.access(gateLockPath)]); + const { default: forceRecall } = await importTsModule(handlerPath); + assert.equal(typeof forceRecall, 'function', 'force-recall handler should export default function'); + + const requestText = [ + 'Please inspect the workspace files and verify the hook injection path.', + 'I need you to review the behavior, choose the final accept/reject decision,', + 'and continue in background with a follow-up later.', + ].join(' '); + + const injected = await runScenario(forceRecall, requestText); const expectedSnippets = [ '[LONG_TASK_GOVERNOR_PREFLIGHT]', @@ -57,22 +66,49 @@ async function main() { 'reason=silent long-task cannot continue without externalized checkpoint path', 'reason=claimed execution requires evidence of a concrete next action', 'reason=owner decision flow must end in button-path, not plain text', - 'ENFORCEMENT: Forbidden path: plain-text handoff that pretends the long task is already continuing without an externalized checkpoint.', - 'ENFORCEMENT: Forbidden path: stating you have already entered the next task/step when the record only contains planning language and no concrete execution evidence.', 'HARD_GATE: Block any plain-text handoff or silent-continuation claim when externalized checkpoint evidence is missing.', 'HARD_GATE: Block any reply path that says you already moved into the next task or are advancing the next step without concrete execution evidence.', 'HARD_GATE: Do not say you are already on the next task, already dispatched follow-up work, or already progressing in background unless you can point to actual tool execution, file changes, emitted messages, or checkpoint records.', - 'Do not claim you have progressed into the next task or are already pushing the next step unless you have concrete evidence such as actual dispatch, tool calls, file changes, or a persisted checkpoint artifact.', + 'ENFORCEMENT: Forbidden path: plain-text handoff that pretends the long task is already continuing without an externalized checkpoint.', + 'ENFORCEMENT: Forbidden path: stating you have already entered the next task/step when the record only contains planning language and no concrete execution evidence.', ]; for (const snippet of expectedSnippets) { - assert.match(injected, new RegExp(snippet.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')), `missing snippet: ${snippet}`); + assert.match(injected, new RegExp(escapeRegex(snippet)), `missing snippet: ${snippet}`); + } + + const originalGateLock = await fs.readFile(gateLockPath, 'utf8'); + const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'force-recall-gate-lock-')); + const backupPath = path.join(tempDir, path.basename(gateLockPath)); + await fs.writeFile(backupPath, originalGateLock, 'utf8'); + await fs.writeFile(gateLockPath, '#!/usr/bin/env node\nprocess.exit(1);\n', 'utf8'); + + let degradedInjected; + try { + degradedInjected = await runScenario(forceRecall, requestText); + } finally { + const backup = await fs.readFile(backupPath, 'utf8'); + await fs.writeFile(gateLockPath, backup, 'utf8'); + await fs.rm(tempDir, { recursive: true, force: true }); + } + + const degradedExpectedSnippets = [ + '[LONG_TASK_GATE_LOCK]', + 'gateStatus=degraded', + 'gateRequired=unknown', + 'HARD_GATE: Evaluator unavailable is not permission to claim silent continuation or next-task progression without verifiable evidence.', + 'HARD_GATE: Fall back to a non-silent, evidence-preserving follow-up if you cannot prove checkpoint state or concrete execution.', + ]; + + for (const snippet of degradedExpectedSnippets) { + assert.match(degradedInjected, new RegExp(escapeRegex(snippet)), `missing degraded snippet: ${snippet}`); } const summary = { ok: true, checked: expectedSnippets, - bodyPreview: injected.split('\n').slice(0, 30), + degradedChecked: degradedExpectedSnippets, + bodyPreview: injected.split('\n').slice(0, 35), }; process.stdout.write(JSON.stringify(summary, null, 2) + '\n');