test: require real checkpoint artifact evidence
This commit is contained in:
@@ -4,6 +4,8 @@ import fs from 'node:fs/promises';
|
|||||||
import os from 'node:os';
|
import os from 'node:os';
|
||||||
import path from 'node:path';
|
import path from 'node:path';
|
||||||
import { pathToFileURL } from 'node:url';
|
import { pathToFileURL } from 'node:url';
|
||||||
|
import { execFile as execFileCallback } from 'node:child_process';
|
||||||
|
import { promisify } from 'node:util';
|
||||||
import { stripTypeScriptTypes } from 'node:module';
|
import { stripTypeScriptTypes } from 'node:module';
|
||||||
|
|
||||||
const __dirname = path.dirname(new URL(import.meta.url).pathname);
|
const __dirname = path.dirname(new URL(import.meta.url).pathname);
|
||||||
@@ -12,6 +14,7 @@ const handlerPath = path.join(repoRoot, 'hooks', 'force-recall', 'handler.ts');
|
|||||||
const wrapperPath = path.join(repoRoot, 'scripts', 'long_task_governor_wrapper.mjs');
|
const wrapperPath = path.join(repoRoot, 'scripts', 'long_task_governor_wrapper.mjs');
|
||||||
const gateLockPath = path.join(repoRoot, 'scripts', 'long_task_gate_lock.mjs');
|
const gateLockPath = path.join(repoRoot, 'scripts', 'long_task_gate_lock.mjs');
|
||||||
const plannerPath = path.join(repoRoot, 'scripts', 'plan_long_task_auto_chain.mjs');
|
const plannerPath = path.join(repoRoot, 'scripts', 'plan_long_task_auto_chain.mjs');
|
||||||
|
const execFileAsync = promisify(execFileCallback);
|
||||||
|
|
||||||
async function importTsModule(tsPath) {
|
async function importTsModule(tsPath) {
|
||||||
const source = await fs.readFile(tsPath, 'utf8');
|
const source = await fs.readFile(tsPath, 'utf8');
|
||||||
@@ -24,12 +27,12 @@ function escapeRegex(snippet) {
|
|||||||
return snippet.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
return snippet.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||||
}
|
}
|
||||||
|
|
||||||
async function runScenario(forceRecall, requestText) {
|
async function runScenario(forceRecall, requestText, workspaceDir = repoRoot) {
|
||||||
const event = {
|
const event = {
|
||||||
type: 'message',
|
type: 'message',
|
||||||
action: 'preprocessed',
|
action: 'preprocessed',
|
||||||
context: {
|
context: {
|
||||||
workspaceDir: repoRoot,
|
workspaceDir,
|
||||||
body: requestText,
|
body: requestText,
|
||||||
bodyForAgent: requestText,
|
bodyForAgent: requestText,
|
||||||
},
|
},
|
||||||
@@ -41,6 +44,28 @@ async function runScenario(forceRecall, requestText) {
|
|||||||
return injected;
|
return injected;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function prepareTempWorkspace() {
|
||||||
|
const tempWorkspace = await fs.mkdtemp(path.join(os.tmpdir(), 'force-recall-workspace-'));
|
||||||
|
await fs.mkdir(path.join(tempWorkspace, 'scripts'), { recursive: true });
|
||||||
|
await fs.mkdir(path.join(tempWorkspace, 'hooks', 'force-recall'), { recursive: true });
|
||||||
|
await fs.mkdir(path.join(tempWorkspace, 'docs'), { recursive: true });
|
||||||
|
|
||||||
|
const copies = [
|
||||||
|
[wrapperPath, path.join(tempWorkspace, 'scripts', 'long_task_governor_wrapper.mjs')],
|
||||||
|
[gateLockPath, path.join(tempWorkspace, 'scripts', 'long_task_gate_lock.mjs')],
|
||||||
|
[plannerPath, path.join(tempWorkspace, 'scripts', 'plan_long_task_auto_chain.mjs')],
|
||||||
|
[handlerPath, path.join(tempWorkspace, 'hooks', 'force-recall', 'handler.ts')],
|
||||||
|
[path.join(repoRoot, 'docs', 'RULEBOOK.md'), path.join(tempWorkspace, 'docs', 'RULEBOOK.md')],
|
||||||
|
[path.join(repoRoot, 'SOUL.md'), path.join(tempWorkspace, 'SOUL.md')],
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const [src, dest] of copies) {
|
||||||
|
await fs.copyFile(src, dest);
|
||||||
|
}
|
||||||
|
|
||||||
|
return tempWorkspace;
|
||||||
|
}
|
||||||
|
|
||||||
async function withPatchedWrapper(tempContent, callback) {
|
async function withPatchedWrapper(tempContent, callback) {
|
||||||
const originalWrapper = await fs.readFile(wrapperPath, 'utf8');
|
const originalWrapper = await fs.readFile(wrapperPath, 'utf8');
|
||||||
await fs.writeFile(wrapperPath, tempContent, 'utf8');
|
await fs.writeFile(wrapperPath, tempContent, 'utf8');
|
||||||
@@ -51,6 +76,29 @@ async function withPatchedWrapper(tempContent, callback) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function withPatchedWrapperWorkspace(wrapperResult, callback) {
|
||||||
|
const tempWorkspace = await prepareTempWorkspace();
|
||||||
|
const wrapperScriptPath = path.join(tempWorkspace, 'scripts', 'long_task_governor_wrapper.mjs');
|
||||||
|
await fs.writeFile(wrapperScriptPath, buildWrapperScript(wrapperResult), 'utf8');
|
||||||
|
|
||||||
|
if (typeof wrapperResult.externalizedCheckpointPath === 'string' && wrapperResult.externalizedCheckpointPath.trim()) {
|
||||||
|
const checkpointPath = path.join(tempWorkspace, wrapperResult.externalizedCheckpointPath);
|
||||||
|
await fs.mkdir(path.dirname(checkpointPath), { recursive: true });
|
||||||
|
await fs.writeFile(checkpointPath, JSON.stringify({
|
||||||
|
kind: 'long_task_checkpoint',
|
||||||
|
currentStep: 'patched-wrapper-test',
|
||||||
|
nextStep: 'patched-wrapper-test-next',
|
||||||
|
verificationResult: 'checkpoint artifact readable in temp workspace',
|
||||||
|
}, null, 2) + '\n', 'utf8');
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
return await callback(tempWorkspace);
|
||||||
|
} finally {
|
||||||
|
await fs.rm(tempWorkspace, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function buildWrapperScript(wrapperResult) {
|
function buildWrapperScript(wrapperResult) {
|
||||||
return `#!/usr/bin/env node\nprocess.stdout.write(JSON.stringify(${JSON.stringify(wrapperResult)}, null, 0) + "\\n");\n`;
|
return `#!/usr/bin/env node\nprocess.stdout.write(JSON.stringify(${JSON.stringify(wrapperResult)}, null, 0) + "\\n");\n`;
|
||||||
}
|
}
|
||||||
@@ -70,7 +118,39 @@ async function main() {
|
|||||||
'Summarize the current dry-run planner state for technical inspection only.',
|
'Summarize the current dry-run planner state for technical inspection only.',
|
||||||
].join(' ');
|
].join(' ');
|
||||||
|
|
||||||
const realWrapperInjected = await runScenario(forceRecall, 'Dispatch a subagent to inspect logs and wait for the result.');
|
const checkpointWorkspace = await prepareTempWorkspace();
|
||||||
|
let realWrapperInjected;
|
||||||
|
try {
|
||||||
|
realWrapperInjected = await runScenario(forceRecall, 'Dispatch a subagent to inspect logs and wait for the result.', checkpointWorkspace);
|
||||||
|
const wrapperInputPath = path.join(checkpointWorkspace, 'wrapper-input.json');
|
||||||
|
await fs.writeFile(wrapperInputPath, JSON.stringify({
|
||||||
|
requestText: 'Dispatch a subagent to inspect logs and wait for the result.',
|
||||||
|
hasFilesOrSystems: false,
|
||||||
|
needsWaiting: false,
|
||||||
|
needsSubagent: false,
|
||||||
|
needsOwnerDecision: false,
|
||||||
|
canReplyNow: false,
|
||||||
|
taskName: 'Hook preflight classification',
|
||||||
|
currentStep: 'Classifying request at preprocessed hook',
|
||||||
|
nextStep: 'Carry governor recommendation into prompt context',
|
||||||
|
nextReportCondition: 'At next meaningful milestone',
|
||||||
|
waitingOn: 'none',
|
||||||
|
blocker: 'none',
|
||||||
|
checkpointTrigger: '',
|
||||||
|
externalizedTrigger: '',
|
||||||
|
triggerKind: '',
|
||||||
|
}), 'utf8');
|
||||||
|
const wrapperRaw = await fs.readFile(path.join(checkpointWorkspace, 'scripts', 'long_task_governor_wrapper.mjs'), 'utf8');
|
||||||
|
assert.ok(wrapperRaw.length > 0, 'temp workspace should contain wrapper script');
|
||||||
|
const { stdout: wrapperStdout } = await execFileAsync('node', [path.join(checkpointWorkspace, 'scripts', 'long_task_governor_wrapper.mjs'), '--compact', '--input', wrapperInputPath], { cwd: checkpointWorkspace, encoding: 'utf8' });
|
||||||
|
const wrapperOutput = JSON.parse(wrapperStdout);
|
||||||
|
const checkpointPath = path.join(checkpointWorkspace, wrapperOutput.externalizedCheckpointPath);
|
||||||
|
const checkpointBody = await fs.readFile(checkpointPath, 'utf8');
|
||||||
|
assert.ok(checkpointBody.trim().length > 0, 'real wrapper integration should emit readable checkpoint artifact');
|
||||||
|
assert.doesNotMatch(checkpointBody, /Hook preflight classification/, 'real wrapper artifact should not fall back to taskRecord.task_name');
|
||||||
|
} finally {
|
||||||
|
await fs.rm(checkpointWorkspace, { recursive: true, force: true });
|
||||||
|
}
|
||||||
assert.match(realWrapperInjected, /classification=long_task/, 'real wrapper integration should classify subagent wait as long_task');
|
assert.match(realWrapperInjected, /classification=long_task/, 'real wrapper integration should classify subagent wait as long_task');
|
||||||
assert.match(realWrapperInjected, /gateStatus=pass/, 'real wrapper integration should pass gate with real progress evidence');
|
assert.match(realWrapperInjected, /gateStatus=pass/, 'real wrapper integration should pass gate with real progress evidence');
|
||||||
assert.match(realWrapperInjected, /allowedResponseMode=silent_continuation/, 'real wrapper integration should preserve silent continuation allowance');
|
assert.match(realWrapperInjected, /allowedResponseMode=silent_continuation/, 'real wrapper integration should preserve silent continuation allowance');
|
||||||
@@ -203,7 +283,7 @@ async function main() {
|
|||||||
assert.equal(neutralSnakeCaseResult.gateStatus, 'pass', 'neutral snake_case non-dispatch action should not trigger dispatch-evidence requirement');
|
assert.equal(neutralSnakeCaseResult.gateStatus, 'pass', 'neutral snake_case non-dispatch action should not trigger dispatch-evidence requirement');
|
||||||
assert.doesNotMatch(JSON.stringify(neutralSnakeCaseResult), /autoChainDispatchEvidence/, 'neutral snake_case non-dispatch action should not mention dispatch-evidence requirement');
|
assert.doesNotMatch(JSON.stringify(neutralSnakeCaseResult), /autoChainDispatchEvidence/, 'neutral snake_case non-dispatch action should not mention dispatch-evidence requirement');
|
||||||
|
|
||||||
const passInjected = await withPatchedWrapper(buildWrapperScript({
|
const passInjected = await withPatchedWrapperWorkspace({
|
||||||
classification: 'long_task',
|
classification: 'long_task',
|
||||||
silentCandidate: true,
|
silentCandidate: true,
|
||||||
needsCheckpoint: true,
|
needsCheckpoint: true,
|
||||||
@@ -220,7 +300,7 @@ async function main() {
|
|||||||
progressEvidence: { sessionKey: 'task-123' },
|
progressEvidence: { sessionKey: 'task-123' },
|
||||||
externalizedCheckpointPath: 'checkpoints/task-123.json',
|
externalizedCheckpointPath: 'checkpoints/task-123.json',
|
||||||
handoff: { mode: 'direct_reply' },
|
handoff: { mode: 'direct_reply' },
|
||||||
}), async () => runScenario(forceRecall, requestText));
|
}, async (workspaceDir) => runScenario(forceRecall, requestText, workspaceDir));
|
||||||
assert.match(passInjected, /gateStatus=pass/, 'hook pass-path should pass when wrapper provides concrete progressEvidence');
|
assert.match(passInjected, /gateStatus=pass/, 'hook pass-path should pass when wrapper provides concrete progressEvidence');
|
||||||
assert.match(passInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook pass-path should emit auto-chain plan block');
|
assert.match(passInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook pass-path should emit auto-chain plan block');
|
||||||
assert.match(passInjected, /plannerStatus=pass/, 'hook pass-path should expose planner pass result');
|
assert.match(passInjected, /plannerStatus=pass/, 'hook pass-path should expose planner pass result');
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
#!/usr/bin/env node
|
#!/usr/bin/env node
|
||||||
import assert from 'node:assert/strict';
|
import assert from 'node:assert/strict';
|
||||||
|
import fs from 'node:fs';
|
||||||
|
import os from 'node:os';
|
||||||
import { execFileSync, spawnSync } from 'node:child_process';
|
import { execFileSync, spawnSync } from 'node:child_process';
|
||||||
import path from 'node:path';
|
import path from 'node:path';
|
||||||
import { fileURLToPath } from 'node:url';
|
import { fileURLToPath } from 'node:url';
|
||||||
@@ -124,6 +126,36 @@ function assertErrorCase(name, args, expectedStderr, input) {
|
|||||||
function main() {
|
function main() {
|
||||||
const results = fixtures.map(runFixture);
|
const results = fixtures.map(runFixture);
|
||||||
|
|
||||||
|
const realismWorkspace = fs.mkdtempSync(path.join(os.tmpdir(), 'wrapper-realism-'));
|
||||||
|
try {
|
||||||
|
const realismInput = path.join(repoRoot, 'docs', '_artifacts', 'long_task_governor_wrapper_subagent_wait_example.json');
|
||||||
|
const stdout = execFileSync(process.execPath, [wrapperPath, '--compact', '--input', realismInput], {
|
||||||
|
cwd: realismWorkspace,
|
||||||
|
encoding: 'utf8',
|
||||||
|
});
|
||||||
|
const output = JSON.parse(stdout);
|
||||||
|
assert.equal(typeof output.externalizedCheckpointPath, 'string', 'realism: missing externalizedCheckpointPath');
|
||||||
|
assert.ok(output.externalizedCheckpointPath.length > 0, 'realism: empty externalizedCheckpointPath');
|
||||||
|
const artifactPath = path.join(realismWorkspace, output.externalizedCheckpointPath);
|
||||||
|
assert.ok(fs.existsSync(artifactPath), `realism: checkpoint artifact missing at ${artifactPath}`);
|
||||||
|
const artifactBody = fs.readFileSync(artifactPath, 'utf8');
|
||||||
|
assert.ok(artifactBody.trim().length > 0, 'realism: checkpoint artifact should be readable and non-empty');
|
||||||
|
assert.equal('task_name' in (output.progressEvidence ?? {}), false, 'realism: progressEvidence must not include task_name fallback');
|
||||||
|
assert.equal(artifactBody.includes('Wait for delegated log survey'), false, 'realism: checkpoint artifact must not fall back to taskRecord.task_name');
|
||||||
|
results.push({
|
||||||
|
name: 'real checkpoint artifact',
|
||||||
|
output: {
|
||||||
|
classification: output.classification,
|
||||||
|
silentCandidate: output.silentCandidate,
|
||||||
|
silentLaunchOk: output.silentLaunchOk,
|
||||||
|
requiredNextAction: output.requiredNextAction,
|
||||||
|
handoff: output.handoff,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} finally {
|
||||||
|
fs.rmSync(realismWorkspace, { recursive: true, force: true });
|
||||||
|
}
|
||||||
|
|
||||||
assertErrorCase('invalid json', ['--compact'], 'INVALID_JSON: input must be valid JSON', 'not-json\n');
|
assertErrorCase('invalid json', ['--compact'], 'INVALID_JSON: input must be valid JSON', 'not-json\n');
|
||||||
assertErrorCase('missing input value', ['--input'], 'CLI_ERROR: --input requires a value');
|
assertErrorCase('missing input value', ['--input'], 'CLI_ERROR: --input requires a value');
|
||||||
assertErrorCase('unknown argument', ['--bogus'], 'CLI_ERROR: unknown argument: --bogus');
|
assertErrorCase('unknown argument', ['--bogus'], 'CLI_ERROR: unknown argument: --bogus');
|
||||||
|
|||||||
Reference in New Issue
Block a user