test: require real checkpoint artifact evidence

This commit is contained in:
Eve
2026-04-24 08:37:57 +08:00
parent ef990d10b7
commit e5db414a00
2 changed files with 117 additions and 5 deletions

View File

@@ -4,6 +4,8 @@ import fs from 'node:fs/promises';
import os from 'node:os';
import path from 'node:path';
import { pathToFileURL } from 'node:url';
import { execFile as execFileCallback } from 'node:child_process';
import { promisify } from 'node:util';
import { stripTypeScriptTypes } from 'node:module';
const __dirname = path.dirname(new URL(import.meta.url).pathname);
@@ -12,6 +14,7 @@ const handlerPath = path.join(repoRoot, 'hooks', 'force-recall', 'handler.ts');
const wrapperPath = path.join(repoRoot, 'scripts', 'long_task_governor_wrapper.mjs');
const gateLockPath = path.join(repoRoot, 'scripts', 'long_task_gate_lock.mjs');
const plannerPath = path.join(repoRoot, 'scripts', 'plan_long_task_auto_chain.mjs');
const execFileAsync = promisify(execFileCallback);
async function importTsModule(tsPath) {
const source = await fs.readFile(tsPath, 'utf8');
@@ -24,12 +27,12 @@ function escapeRegex(snippet) {
return snippet.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
async function runScenario(forceRecall, requestText) {
async function runScenario(forceRecall, requestText, workspaceDir = repoRoot) {
const event = {
type: 'message',
action: 'preprocessed',
context: {
workspaceDir: repoRoot,
workspaceDir,
body: requestText,
bodyForAgent: requestText,
},
@@ -41,6 +44,28 @@ async function runScenario(forceRecall, requestText) {
return injected;
}
async function prepareTempWorkspace() {
const tempWorkspace = await fs.mkdtemp(path.join(os.tmpdir(), 'force-recall-workspace-'));
await fs.mkdir(path.join(tempWorkspace, 'scripts'), { recursive: true });
await fs.mkdir(path.join(tempWorkspace, 'hooks', 'force-recall'), { recursive: true });
await fs.mkdir(path.join(tempWorkspace, 'docs'), { recursive: true });
const copies = [
[wrapperPath, path.join(tempWorkspace, 'scripts', 'long_task_governor_wrapper.mjs')],
[gateLockPath, path.join(tempWorkspace, 'scripts', 'long_task_gate_lock.mjs')],
[plannerPath, path.join(tempWorkspace, 'scripts', 'plan_long_task_auto_chain.mjs')],
[handlerPath, path.join(tempWorkspace, 'hooks', 'force-recall', 'handler.ts')],
[path.join(repoRoot, 'docs', 'RULEBOOK.md'), path.join(tempWorkspace, 'docs', 'RULEBOOK.md')],
[path.join(repoRoot, 'SOUL.md'), path.join(tempWorkspace, 'SOUL.md')],
];
for (const [src, dest] of copies) {
await fs.copyFile(src, dest);
}
return tempWorkspace;
}
async function withPatchedWrapper(tempContent, callback) {
const originalWrapper = await fs.readFile(wrapperPath, 'utf8');
await fs.writeFile(wrapperPath, tempContent, 'utf8');
@@ -51,6 +76,29 @@ async function withPatchedWrapper(tempContent, callback) {
}
}
async function withPatchedWrapperWorkspace(wrapperResult, callback) {
const tempWorkspace = await prepareTempWorkspace();
const wrapperScriptPath = path.join(tempWorkspace, 'scripts', 'long_task_governor_wrapper.mjs');
await fs.writeFile(wrapperScriptPath, buildWrapperScript(wrapperResult), 'utf8');
if (typeof wrapperResult.externalizedCheckpointPath === 'string' && wrapperResult.externalizedCheckpointPath.trim()) {
const checkpointPath = path.join(tempWorkspace, wrapperResult.externalizedCheckpointPath);
await fs.mkdir(path.dirname(checkpointPath), { recursive: true });
await fs.writeFile(checkpointPath, JSON.stringify({
kind: 'long_task_checkpoint',
currentStep: 'patched-wrapper-test',
nextStep: 'patched-wrapper-test-next',
verificationResult: 'checkpoint artifact readable in temp workspace',
}, null, 2) + '\n', 'utf8');
}
try {
return await callback(tempWorkspace);
} finally {
await fs.rm(tempWorkspace, { recursive: true, force: true });
}
}
function buildWrapperScript(wrapperResult) {
return `#!/usr/bin/env node\nprocess.stdout.write(JSON.stringify(${JSON.stringify(wrapperResult)}, null, 0) + "\\n");\n`;
}
@@ -70,7 +118,39 @@ async function main() {
'Summarize the current dry-run planner state for technical inspection only.',
].join(' ');
const realWrapperInjected = await runScenario(forceRecall, 'Dispatch a subagent to inspect logs and wait for the result.');
const checkpointWorkspace = await prepareTempWorkspace();
let realWrapperInjected;
try {
realWrapperInjected = await runScenario(forceRecall, 'Dispatch a subagent to inspect logs and wait for the result.', checkpointWorkspace);
const wrapperInputPath = path.join(checkpointWorkspace, 'wrapper-input.json');
await fs.writeFile(wrapperInputPath, JSON.stringify({
requestText: 'Dispatch a subagent to inspect logs and wait for the result.',
hasFilesOrSystems: false,
needsWaiting: false,
needsSubagent: false,
needsOwnerDecision: false,
canReplyNow: false,
taskName: 'Hook preflight classification',
currentStep: 'Classifying request at preprocessed hook',
nextStep: 'Carry governor recommendation into prompt context',
nextReportCondition: 'At next meaningful milestone',
waitingOn: 'none',
blocker: 'none',
checkpointTrigger: '',
externalizedTrigger: '',
triggerKind: '',
}), 'utf8');
const wrapperRaw = await fs.readFile(path.join(checkpointWorkspace, 'scripts', 'long_task_governor_wrapper.mjs'), 'utf8');
assert.ok(wrapperRaw.length > 0, 'temp workspace should contain wrapper script');
const { stdout: wrapperStdout } = await execFileAsync('node', [path.join(checkpointWorkspace, 'scripts', 'long_task_governor_wrapper.mjs'), '--compact', '--input', wrapperInputPath], { cwd: checkpointWorkspace, encoding: 'utf8' });
const wrapperOutput = JSON.parse(wrapperStdout);
const checkpointPath = path.join(checkpointWorkspace, wrapperOutput.externalizedCheckpointPath);
const checkpointBody = await fs.readFile(checkpointPath, 'utf8');
assert.ok(checkpointBody.trim().length > 0, 'real wrapper integration should emit readable checkpoint artifact');
assert.doesNotMatch(checkpointBody, /Hook preflight classification/, 'real wrapper artifact should not fall back to taskRecord.task_name');
} finally {
await fs.rm(checkpointWorkspace, { recursive: true, force: true });
}
assert.match(realWrapperInjected, /classification=long_task/, 'real wrapper integration should classify subagent wait as long_task');
assert.match(realWrapperInjected, /gateStatus=pass/, 'real wrapper integration should pass gate with real progress evidence');
assert.match(realWrapperInjected, /allowedResponseMode=silent_continuation/, 'real wrapper integration should preserve silent continuation allowance');
@@ -203,7 +283,7 @@ async function main() {
assert.equal(neutralSnakeCaseResult.gateStatus, 'pass', 'neutral snake_case non-dispatch action should not trigger dispatch-evidence requirement');
assert.doesNotMatch(JSON.stringify(neutralSnakeCaseResult), /autoChainDispatchEvidence/, 'neutral snake_case non-dispatch action should not mention dispatch-evidence requirement');
const passInjected = await withPatchedWrapper(buildWrapperScript({
const passInjected = await withPatchedWrapperWorkspace({
classification: 'long_task',
silentCandidate: true,
needsCheckpoint: true,
@@ -220,7 +300,7 @@ async function main() {
progressEvidence: { sessionKey: 'task-123' },
externalizedCheckpointPath: 'checkpoints/task-123.json',
handoff: { mode: 'direct_reply' },
}), async () => runScenario(forceRecall, requestText));
}, async (workspaceDir) => runScenario(forceRecall, requestText, workspaceDir));
assert.match(passInjected, /gateStatus=pass/, 'hook pass-path should pass when wrapper provides concrete progressEvidence');
assert.match(passInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook pass-path should emit auto-chain plan block');
assert.match(passInjected, /plannerStatus=pass/, 'hook pass-path should expose planner pass result');