test: require real checkpoint artifact evidence
This commit is contained in:
@@ -4,6 +4,8 @@ import fs from 'node:fs/promises';
|
||||
import os from 'node:os';
|
||||
import path from 'node:path';
|
||||
import { pathToFileURL } from 'node:url';
|
||||
import { execFile as execFileCallback } from 'node:child_process';
|
||||
import { promisify } from 'node:util';
|
||||
import { stripTypeScriptTypes } from 'node:module';
|
||||
|
||||
const __dirname = path.dirname(new URL(import.meta.url).pathname);
|
||||
@@ -12,6 +14,7 @@ const handlerPath = path.join(repoRoot, 'hooks', 'force-recall', 'handler.ts');
|
||||
const wrapperPath = path.join(repoRoot, 'scripts', 'long_task_governor_wrapper.mjs');
|
||||
const gateLockPath = path.join(repoRoot, 'scripts', 'long_task_gate_lock.mjs');
|
||||
const plannerPath = path.join(repoRoot, 'scripts', 'plan_long_task_auto_chain.mjs');
|
||||
const execFileAsync = promisify(execFileCallback);
|
||||
|
||||
async function importTsModule(tsPath) {
|
||||
const source = await fs.readFile(tsPath, 'utf8');
|
||||
@@ -24,12 +27,12 @@ function escapeRegex(snippet) {
|
||||
return snippet.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
}
|
||||
|
||||
async function runScenario(forceRecall, requestText) {
|
||||
async function runScenario(forceRecall, requestText, workspaceDir = repoRoot) {
|
||||
const event = {
|
||||
type: 'message',
|
||||
action: 'preprocessed',
|
||||
context: {
|
||||
workspaceDir: repoRoot,
|
||||
workspaceDir,
|
||||
body: requestText,
|
||||
bodyForAgent: requestText,
|
||||
},
|
||||
@@ -41,6 +44,28 @@ async function runScenario(forceRecall, requestText) {
|
||||
return injected;
|
||||
}
|
||||
|
||||
async function prepareTempWorkspace() {
|
||||
const tempWorkspace = await fs.mkdtemp(path.join(os.tmpdir(), 'force-recall-workspace-'));
|
||||
await fs.mkdir(path.join(tempWorkspace, 'scripts'), { recursive: true });
|
||||
await fs.mkdir(path.join(tempWorkspace, 'hooks', 'force-recall'), { recursive: true });
|
||||
await fs.mkdir(path.join(tempWorkspace, 'docs'), { recursive: true });
|
||||
|
||||
const copies = [
|
||||
[wrapperPath, path.join(tempWorkspace, 'scripts', 'long_task_governor_wrapper.mjs')],
|
||||
[gateLockPath, path.join(tempWorkspace, 'scripts', 'long_task_gate_lock.mjs')],
|
||||
[plannerPath, path.join(tempWorkspace, 'scripts', 'plan_long_task_auto_chain.mjs')],
|
||||
[handlerPath, path.join(tempWorkspace, 'hooks', 'force-recall', 'handler.ts')],
|
||||
[path.join(repoRoot, 'docs', 'RULEBOOK.md'), path.join(tempWorkspace, 'docs', 'RULEBOOK.md')],
|
||||
[path.join(repoRoot, 'SOUL.md'), path.join(tempWorkspace, 'SOUL.md')],
|
||||
];
|
||||
|
||||
for (const [src, dest] of copies) {
|
||||
await fs.copyFile(src, dest);
|
||||
}
|
||||
|
||||
return tempWorkspace;
|
||||
}
|
||||
|
||||
async function withPatchedWrapper(tempContent, callback) {
|
||||
const originalWrapper = await fs.readFile(wrapperPath, 'utf8');
|
||||
await fs.writeFile(wrapperPath, tempContent, 'utf8');
|
||||
@@ -51,6 +76,29 @@ async function withPatchedWrapper(tempContent, callback) {
|
||||
}
|
||||
}
|
||||
|
||||
async function withPatchedWrapperWorkspace(wrapperResult, callback) {
|
||||
const tempWorkspace = await prepareTempWorkspace();
|
||||
const wrapperScriptPath = path.join(tempWorkspace, 'scripts', 'long_task_governor_wrapper.mjs');
|
||||
await fs.writeFile(wrapperScriptPath, buildWrapperScript(wrapperResult), 'utf8');
|
||||
|
||||
if (typeof wrapperResult.externalizedCheckpointPath === 'string' && wrapperResult.externalizedCheckpointPath.trim()) {
|
||||
const checkpointPath = path.join(tempWorkspace, wrapperResult.externalizedCheckpointPath);
|
||||
await fs.mkdir(path.dirname(checkpointPath), { recursive: true });
|
||||
await fs.writeFile(checkpointPath, JSON.stringify({
|
||||
kind: 'long_task_checkpoint',
|
||||
currentStep: 'patched-wrapper-test',
|
||||
nextStep: 'patched-wrapper-test-next',
|
||||
verificationResult: 'checkpoint artifact readable in temp workspace',
|
||||
}, null, 2) + '\n', 'utf8');
|
||||
}
|
||||
|
||||
try {
|
||||
return await callback(tempWorkspace);
|
||||
} finally {
|
||||
await fs.rm(tempWorkspace, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
function buildWrapperScript(wrapperResult) {
|
||||
return `#!/usr/bin/env node\nprocess.stdout.write(JSON.stringify(${JSON.stringify(wrapperResult)}, null, 0) + "\\n");\n`;
|
||||
}
|
||||
@@ -70,7 +118,39 @@ async function main() {
|
||||
'Summarize the current dry-run planner state for technical inspection only.',
|
||||
].join(' ');
|
||||
|
||||
const realWrapperInjected = await runScenario(forceRecall, 'Dispatch a subagent to inspect logs and wait for the result.');
|
||||
const checkpointWorkspace = await prepareTempWorkspace();
|
||||
let realWrapperInjected;
|
||||
try {
|
||||
realWrapperInjected = await runScenario(forceRecall, 'Dispatch a subagent to inspect logs and wait for the result.', checkpointWorkspace);
|
||||
const wrapperInputPath = path.join(checkpointWorkspace, 'wrapper-input.json');
|
||||
await fs.writeFile(wrapperInputPath, JSON.stringify({
|
||||
requestText: 'Dispatch a subagent to inspect logs and wait for the result.',
|
||||
hasFilesOrSystems: false,
|
||||
needsWaiting: false,
|
||||
needsSubagent: false,
|
||||
needsOwnerDecision: false,
|
||||
canReplyNow: false,
|
||||
taskName: 'Hook preflight classification',
|
||||
currentStep: 'Classifying request at preprocessed hook',
|
||||
nextStep: 'Carry governor recommendation into prompt context',
|
||||
nextReportCondition: 'At next meaningful milestone',
|
||||
waitingOn: 'none',
|
||||
blocker: 'none',
|
||||
checkpointTrigger: '',
|
||||
externalizedTrigger: '',
|
||||
triggerKind: '',
|
||||
}), 'utf8');
|
||||
const wrapperRaw = await fs.readFile(path.join(checkpointWorkspace, 'scripts', 'long_task_governor_wrapper.mjs'), 'utf8');
|
||||
assert.ok(wrapperRaw.length > 0, 'temp workspace should contain wrapper script');
|
||||
const { stdout: wrapperStdout } = await execFileAsync('node', [path.join(checkpointWorkspace, 'scripts', 'long_task_governor_wrapper.mjs'), '--compact', '--input', wrapperInputPath], { cwd: checkpointWorkspace, encoding: 'utf8' });
|
||||
const wrapperOutput = JSON.parse(wrapperStdout);
|
||||
const checkpointPath = path.join(checkpointWorkspace, wrapperOutput.externalizedCheckpointPath);
|
||||
const checkpointBody = await fs.readFile(checkpointPath, 'utf8');
|
||||
assert.ok(checkpointBody.trim().length > 0, 'real wrapper integration should emit readable checkpoint artifact');
|
||||
assert.doesNotMatch(checkpointBody, /Hook preflight classification/, 'real wrapper artifact should not fall back to taskRecord.task_name');
|
||||
} finally {
|
||||
await fs.rm(checkpointWorkspace, { recursive: true, force: true });
|
||||
}
|
||||
assert.match(realWrapperInjected, /classification=long_task/, 'real wrapper integration should classify subagent wait as long_task');
|
||||
assert.match(realWrapperInjected, /gateStatus=pass/, 'real wrapper integration should pass gate with real progress evidence');
|
||||
assert.match(realWrapperInjected, /allowedResponseMode=silent_continuation/, 'real wrapper integration should preserve silent continuation allowance');
|
||||
@@ -203,7 +283,7 @@ async function main() {
|
||||
assert.equal(neutralSnakeCaseResult.gateStatus, 'pass', 'neutral snake_case non-dispatch action should not trigger dispatch-evidence requirement');
|
||||
assert.doesNotMatch(JSON.stringify(neutralSnakeCaseResult), /autoChainDispatchEvidence/, 'neutral snake_case non-dispatch action should not mention dispatch-evidence requirement');
|
||||
|
||||
const passInjected = await withPatchedWrapper(buildWrapperScript({
|
||||
const passInjected = await withPatchedWrapperWorkspace({
|
||||
classification: 'long_task',
|
||||
silentCandidate: true,
|
||||
needsCheckpoint: true,
|
||||
@@ -220,7 +300,7 @@ async function main() {
|
||||
progressEvidence: { sessionKey: 'task-123' },
|
||||
externalizedCheckpointPath: 'checkpoints/task-123.json',
|
||||
handoff: { mode: 'direct_reply' },
|
||||
}), async () => runScenario(forceRecall, requestText));
|
||||
}, async (workspaceDir) => runScenario(forceRecall, requestText, workspaceDir));
|
||||
assert.match(passInjected, /gateStatus=pass/, 'hook pass-path should pass when wrapper provides concrete progressEvidence');
|
||||
assert.match(passInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook pass-path should emit auto-chain plan block');
|
||||
assert.match(passInjected, /plannerStatus=pass/, 'hook pass-path should expose planner pass result');
|
||||
|
||||
Reference in New Issue
Block a user