feat: inject long-task gate lock into force-recall hook

This commit is contained in:
Eve
2026-04-23 11:48:58 +08:00
parent 91529166fe
commit 2987c496c1

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env node #!/usr/bin/env node
import assert from 'node:assert/strict'; import assert from 'node:assert/strict';
import fs from 'node:fs/promises'; import fs from 'node:fs/promises';
import os from 'node:os';
import path from 'node:path'; import path from 'node:path';
import { pathToFileURL } from 'node:url'; import { pathToFileURL } from 'node:url';
import { stripTypeScriptTypes } from 'node:module'; import { stripTypeScriptTypes } from 'node:module';
@@ -18,17 +19,11 @@ async function importTsModule(tsPath) {
return import(dataUrl); return import(dataUrl);
} }
async function main() { function escapeRegex(snippet) {
await Promise.all([fs.access(wrapperPath), fs.access(gateLockPath)]); return snippet.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
const { default: forceRecall } = await importTsModule(handlerPath); }
assert.equal(typeof forceRecall, 'function', 'force-recall handler should export default function');
const requestText = [
'Please inspect the workspace files and verify the hook injection path.',
'I need you to review the behavior, choose the final accept/reject decision,',
'and continue in background with a follow-up later.',
].join(' ');
async function runScenario(forceRecall, requestText) {
const event = { const event = {
type: 'message', type: 'message',
action: 'preprocessed', action: 'preprocessed',
@@ -40,9 +35,23 @@ async function main() {
}; };
await forceRecall(event); await forceRecall(event);
const injected = event.context?.bodyForAgent; const injected = event.context?.bodyForAgent;
assert.equal(typeof injected, 'string', 'event.context.bodyForAgent should be a string after handler runs'); assert.equal(typeof injected, 'string', 'event.context.bodyForAgent should be a string after handler runs');
return injected;
}
async function main() {
await Promise.all([fs.access(wrapperPath), fs.access(gateLockPath)]);
const { default: forceRecall } = await importTsModule(handlerPath);
assert.equal(typeof forceRecall, 'function', 'force-recall handler should export default function');
const requestText = [
'Please inspect the workspace files and verify the hook injection path.',
'I need you to review the behavior, choose the final accept/reject decision,',
'and continue in background with a follow-up later.',
].join(' ');
const injected = await runScenario(forceRecall, requestText);
const expectedSnippets = [ const expectedSnippets = [
'[LONG_TASK_GOVERNOR_PREFLIGHT]', '[LONG_TASK_GOVERNOR_PREFLIGHT]',
@@ -57,22 +66,49 @@ async function main() {
'reason=silent long-task cannot continue without externalized checkpoint path', 'reason=silent long-task cannot continue without externalized checkpoint path',
'reason=claimed execution requires evidence of a concrete next action', 'reason=claimed execution requires evidence of a concrete next action',
'reason=owner decision flow must end in button-path, not plain text', 'reason=owner decision flow must end in button-path, not plain text',
'ENFORCEMENT: Forbidden path: plain-text handoff that pretends the long task is already continuing without an externalized checkpoint.',
'ENFORCEMENT: Forbidden path: stating you have already entered the next task/step when the record only contains planning language and no concrete execution evidence.',
'HARD_GATE: Block any plain-text handoff or silent-continuation claim when externalized checkpoint evidence is missing.', 'HARD_GATE: Block any plain-text handoff or silent-continuation claim when externalized checkpoint evidence is missing.',
'HARD_GATE: Block any reply path that says you already moved into the next task or are advancing the next step without concrete execution evidence.', 'HARD_GATE: Block any reply path that says you already moved into the next task or are advancing the next step without concrete execution evidence.',
'HARD_GATE: Do not say you are already on the next task, already dispatched follow-up work, or already progressing in background unless you can point to actual tool execution, file changes, emitted messages, or checkpoint records.', 'HARD_GATE: Do not say you are already on the next task, already dispatched follow-up work, or already progressing in background unless you can point to actual tool execution, file changes, emitted messages, or checkpoint records.',
'Do not claim you have progressed into the next task or are already pushing the next step unless you have concrete evidence such as actual dispatch, tool calls, file changes, or a persisted checkpoint artifact.', 'ENFORCEMENT: Forbidden path: plain-text handoff that pretends the long task is already continuing without an externalized checkpoint.',
'ENFORCEMENT: Forbidden path: stating you have already entered the next task/step when the record only contains planning language and no concrete execution evidence.',
]; ];
for (const snippet of expectedSnippets) { for (const snippet of expectedSnippets) {
assert.match(injected, new RegExp(snippet.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')), `missing snippet: ${snippet}`); assert.match(injected, new RegExp(escapeRegex(snippet)), `missing snippet: ${snippet}`);
}
const originalGateLock = await fs.readFile(gateLockPath, 'utf8');
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'force-recall-gate-lock-'));
const backupPath = path.join(tempDir, path.basename(gateLockPath));
await fs.writeFile(backupPath, originalGateLock, 'utf8');
await fs.writeFile(gateLockPath, '#!/usr/bin/env node\nprocess.exit(1);\n', 'utf8');
let degradedInjected;
try {
degradedInjected = await runScenario(forceRecall, requestText);
} finally {
const backup = await fs.readFile(backupPath, 'utf8');
await fs.writeFile(gateLockPath, backup, 'utf8');
await fs.rm(tempDir, { recursive: true, force: true });
}
const degradedExpectedSnippets = [
'[LONG_TASK_GATE_LOCK]',
'gateStatus=degraded',
'gateRequired=unknown',
'HARD_GATE: Evaluator unavailable is not permission to claim silent continuation or next-task progression without verifiable evidence.',
'HARD_GATE: Fall back to a non-silent, evidence-preserving follow-up if you cannot prove checkpoint state or concrete execution.',
];
for (const snippet of degradedExpectedSnippets) {
assert.match(degradedInjected, new RegExp(escapeRegex(snippet)), `missing degraded snippet: ${snippet}`);
} }
const summary = { const summary = {
ok: true, ok: true,
checked: expectedSnippets, checked: expectedSnippets,
bodyPreview: injected.split('\n').slice(0, 30), degradedChecked: degradedExpectedSnippets,
bodyPreview: injected.split('\n').slice(0, 35),
}; };
process.stdout.write(JSON.stringify(summary, null, 2) + '\n'); process.stdout.write(JSON.stringify(summary, null, 2) + '\n');