test: require real checkpoint artifact evidence

This commit is contained in:
Eve
2026-04-24 08:37:57 +08:00
parent ef990d10b7
commit e5db414a00
2 changed files with 117 additions and 5 deletions

View File

@@ -4,6 +4,8 @@ import fs from 'node:fs/promises';
import os from 'node:os'; import os from 'node:os';
import path from 'node:path'; import path from 'node:path';
import { pathToFileURL } from 'node:url'; import { pathToFileURL } from 'node:url';
import { execFile as execFileCallback } from 'node:child_process';
import { promisify } from 'node:util';
import { stripTypeScriptTypes } from 'node:module'; import { stripTypeScriptTypes } from 'node:module';
const __dirname = path.dirname(new URL(import.meta.url).pathname); const __dirname = path.dirname(new URL(import.meta.url).pathname);
@@ -12,6 +14,7 @@ const handlerPath = path.join(repoRoot, 'hooks', 'force-recall', 'handler.ts');
const wrapperPath = path.join(repoRoot, 'scripts', 'long_task_governor_wrapper.mjs'); const wrapperPath = path.join(repoRoot, 'scripts', 'long_task_governor_wrapper.mjs');
const gateLockPath = path.join(repoRoot, 'scripts', 'long_task_gate_lock.mjs'); const gateLockPath = path.join(repoRoot, 'scripts', 'long_task_gate_lock.mjs');
const plannerPath = path.join(repoRoot, 'scripts', 'plan_long_task_auto_chain.mjs'); const plannerPath = path.join(repoRoot, 'scripts', 'plan_long_task_auto_chain.mjs');
const execFileAsync = promisify(execFileCallback);
async function importTsModule(tsPath) { async function importTsModule(tsPath) {
const source = await fs.readFile(tsPath, 'utf8'); const source = await fs.readFile(tsPath, 'utf8');
@@ -24,12 +27,12 @@ function escapeRegex(snippet) {
return snippet.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); return snippet.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
} }
async function runScenario(forceRecall, requestText) { async function runScenario(forceRecall, requestText, workspaceDir = repoRoot) {
const event = { const event = {
type: 'message', type: 'message',
action: 'preprocessed', action: 'preprocessed',
context: { context: {
workspaceDir: repoRoot, workspaceDir,
body: requestText, body: requestText,
bodyForAgent: requestText, bodyForAgent: requestText,
}, },
@@ -41,6 +44,28 @@ async function runScenario(forceRecall, requestText) {
return injected; return injected;
} }
async function prepareTempWorkspace() {
const tempWorkspace = await fs.mkdtemp(path.join(os.tmpdir(), 'force-recall-workspace-'));
await fs.mkdir(path.join(tempWorkspace, 'scripts'), { recursive: true });
await fs.mkdir(path.join(tempWorkspace, 'hooks', 'force-recall'), { recursive: true });
await fs.mkdir(path.join(tempWorkspace, 'docs'), { recursive: true });
const copies = [
[wrapperPath, path.join(tempWorkspace, 'scripts', 'long_task_governor_wrapper.mjs')],
[gateLockPath, path.join(tempWorkspace, 'scripts', 'long_task_gate_lock.mjs')],
[plannerPath, path.join(tempWorkspace, 'scripts', 'plan_long_task_auto_chain.mjs')],
[handlerPath, path.join(tempWorkspace, 'hooks', 'force-recall', 'handler.ts')],
[path.join(repoRoot, 'docs', 'RULEBOOK.md'), path.join(tempWorkspace, 'docs', 'RULEBOOK.md')],
[path.join(repoRoot, 'SOUL.md'), path.join(tempWorkspace, 'SOUL.md')],
];
for (const [src, dest] of copies) {
await fs.copyFile(src, dest);
}
return tempWorkspace;
}
async function withPatchedWrapper(tempContent, callback) { async function withPatchedWrapper(tempContent, callback) {
const originalWrapper = await fs.readFile(wrapperPath, 'utf8'); const originalWrapper = await fs.readFile(wrapperPath, 'utf8');
await fs.writeFile(wrapperPath, tempContent, 'utf8'); await fs.writeFile(wrapperPath, tempContent, 'utf8');
@@ -51,6 +76,29 @@ async function withPatchedWrapper(tempContent, callback) {
} }
} }
async function withPatchedWrapperWorkspace(wrapperResult, callback) {
const tempWorkspace = await prepareTempWorkspace();
const wrapperScriptPath = path.join(tempWorkspace, 'scripts', 'long_task_governor_wrapper.mjs');
await fs.writeFile(wrapperScriptPath, buildWrapperScript(wrapperResult), 'utf8');
if (typeof wrapperResult.externalizedCheckpointPath === 'string' && wrapperResult.externalizedCheckpointPath.trim()) {
const checkpointPath = path.join(tempWorkspace, wrapperResult.externalizedCheckpointPath);
await fs.mkdir(path.dirname(checkpointPath), { recursive: true });
await fs.writeFile(checkpointPath, JSON.stringify({
kind: 'long_task_checkpoint',
currentStep: 'patched-wrapper-test',
nextStep: 'patched-wrapper-test-next',
verificationResult: 'checkpoint artifact readable in temp workspace',
}, null, 2) + '\n', 'utf8');
}
try {
return await callback(tempWorkspace);
} finally {
await fs.rm(tempWorkspace, { recursive: true, force: true });
}
}
function buildWrapperScript(wrapperResult) { function buildWrapperScript(wrapperResult) {
return `#!/usr/bin/env node\nprocess.stdout.write(JSON.stringify(${JSON.stringify(wrapperResult)}, null, 0) + "\\n");\n`; return `#!/usr/bin/env node\nprocess.stdout.write(JSON.stringify(${JSON.stringify(wrapperResult)}, null, 0) + "\\n");\n`;
} }
@@ -70,7 +118,39 @@ async function main() {
'Summarize the current dry-run planner state for technical inspection only.', 'Summarize the current dry-run planner state for technical inspection only.',
].join(' '); ].join(' ');
const realWrapperInjected = await runScenario(forceRecall, 'Dispatch a subagent to inspect logs and wait for the result.'); const checkpointWorkspace = await prepareTempWorkspace();
let realWrapperInjected;
try {
realWrapperInjected = await runScenario(forceRecall, 'Dispatch a subagent to inspect logs and wait for the result.', checkpointWorkspace);
const wrapperInputPath = path.join(checkpointWorkspace, 'wrapper-input.json');
await fs.writeFile(wrapperInputPath, JSON.stringify({
requestText: 'Dispatch a subagent to inspect logs and wait for the result.',
hasFilesOrSystems: false,
needsWaiting: false,
needsSubagent: false,
needsOwnerDecision: false,
canReplyNow: false,
taskName: 'Hook preflight classification',
currentStep: 'Classifying request at preprocessed hook',
nextStep: 'Carry governor recommendation into prompt context',
nextReportCondition: 'At next meaningful milestone',
waitingOn: 'none',
blocker: 'none',
checkpointTrigger: '',
externalizedTrigger: '',
triggerKind: '',
}), 'utf8');
const wrapperRaw = await fs.readFile(path.join(checkpointWorkspace, 'scripts', 'long_task_governor_wrapper.mjs'), 'utf8');
assert.ok(wrapperRaw.length > 0, 'temp workspace should contain wrapper script');
const { stdout: wrapperStdout } = await execFileAsync('node', [path.join(checkpointWorkspace, 'scripts', 'long_task_governor_wrapper.mjs'), '--compact', '--input', wrapperInputPath], { cwd: checkpointWorkspace, encoding: 'utf8' });
const wrapperOutput = JSON.parse(wrapperStdout);
const checkpointPath = path.join(checkpointWorkspace, wrapperOutput.externalizedCheckpointPath);
const checkpointBody = await fs.readFile(checkpointPath, 'utf8');
assert.ok(checkpointBody.trim().length > 0, 'real wrapper integration should emit readable checkpoint artifact');
assert.doesNotMatch(checkpointBody, /Hook preflight classification/, 'real wrapper artifact should not fall back to taskRecord.task_name');
} finally {
await fs.rm(checkpointWorkspace, { recursive: true, force: true });
}
assert.match(realWrapperInjected, /classification=long_task/, 'real wrapper integration should classify subagent wait as long_task'); assert.match(realWrapperInjected, /classification=long_task/, 'real wrapper integration should classify subagent wait as long_task');
assert.match(realWrapperInjected, /gateStatus=pass/, 'real wrapper integration should pass gate with real progress evidence'); assert.match(realWrapperInjected, /gateStatus=pass/, 'real wrapper integration should pass gate with real progress evidence');
assert.match(realWrapperInjected, /allowedResponseMode=silent_continuation/, 'real wrapper integration should preserve silent continuation allowance'); assert.match(realWrapperInjected, /allowedResponseMode=silent_continuation/, 'real wrapper integration should preserve silent continuation allowance');
@@ -203,7 +283,7 @@ async function main() {
assert.equal(neutralSnakeCaseResult.gateStatus, 'pass', 'neutral snake_case non-dispatch action should not trigger dispatch-evidence requirement'); assert.equal(neutralSnakeCaseResult.gateStatus, 'pass', 'neutral snake_case non-dispatch action should not trigger dispatch-evidence requirement');
assert.doesNotMatch(JSON.stringify(neutralSnakeCaseResult), /autoChainDispatchEvidence/, 'neutral snake_case non-dispatch action should not mention dispatch-evidence requirement'); assert.doesNotMatch(JSON.stringify(neutralSnakeCaseResult), /autoChainDispatchEvidence/, 'neutral snake_case non-dispatch action should not mention dispatch-evidence requirement');
const passInjected = await withPatchedWrapper(buildWrapperScript({ const passInjected = await withPatchedWrapperWorkspace({
classification: 'long_task', classification: 'long_task',
silentCandidate: true, silentCandidate: true,
needsCheckpoint: true, needsCheckpoint: true,
@@ -220,7 +300,7 @@ async function main() {
progressEvidence: { sessionKey: 'task-123' }, progressEvidence: { sessionKey: 'task-123' },
externalizedCheckpointPath: 'checkpoints/task-123.json', externalizedCheckpointPath: 'checkpoints/task-123.json',
handoff: { mode: 'direct_reply' }, handoff: { mode: 'direct_reply' },
}), async () => runScenario(forceRecall, requestText)); }, async (workspaceDir) => runScenario(forceRecall, requestText, workspaceDir));
assert.match(passInjected, /gateStatus=pass/, 'hook pass-path should pass when wrapper provides concrete progressEvidence'); assert.match(passInjected, /gateStatus=pass/, 'hook pass-path should pass when wrapper provides concrete progressEvidence');
assert.match(passInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook pass-path should emit auto-chain plan block'); assert.match(passInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook pass-path should emit auto-chain plan block');
assert.match(passInjected, /plannerStatus=pass/, 'hook pass-path should expose planner pass result'); assert.match(passInjected, /plannerStatus=pass/, 'hook pass-path should expose planner pass result');

View File

@@ -1,5 +1,7 @@
#!/usr/bin/env node #!/usr/bin/env node
import assert from 'node:assert/strict'; import assert from 'node:assert/strict';
import fs from 'node:fs';
import os from 'node:os';
import { execFileSync, spawnSync } from 'node:child_process'; import { execFileSync, spawnSync } from 'node:child_process';
import path from 'node:path'; import path from 'node:path';
import { fileURLToPath } from 'node:url'; import { fileURLToPath } from 'node:url';
@@ -124,6 +126,36 @@ function assertErrorCase(name, args, expectedStderr, input) {
function main() { function main() {
const results = fixtures.map(runFixture); const results = fixtures.map(runFixture);
const realismWorkspace = fs.mkdtempSync(path.join(os.tmpdir(), 'wrapper-realism-'));
try {
const realismInput = path.join(repoRoot, 'docs', '_artifacts', 'long_task_governor_wrapper_subagent_wait_example.json');
const stdout = execFileSync(process.execPath, [wrapperPath, '--compact', '--input', realismInput], {
cwd: realismWorkspace,
encoding: 'utf8',
});
const output = JSON.parse(stdout);
assert.equal(typeof output.externalizedCheckpointPath, 'string', 'realism: missing externalizedCheckpointPath');
assert.ok(output.externalizedCheckpointPath.length > 0, 'realism: empty externalizedCheckpointPath');
const artifactPath = path.join(realismWorkspace, output.externalizedCheckpointPath);
assert.ok(fs.existsSync(artifactPath), `realism: checkpoint artifact missing at ${artifactPath}`);
const artifactBody = fs.readFileSync(artifactPath, 'utf8');
assert.ok(artifactBody.trim().length > 0, 'realism: checkpoint artifact should be readable and non-empty');
assert.equal('task_name' in (output.progressEvidence ?? {}), false, 'realism: progressEvidence must not include task_name fallback');
assert.equal(artifactBody.includes('Wait for delegated log survey'), false, 'realism: checkpoint artifact must not fall back to taskRecord.task_name');
results.push({
name: 'real checkpoint artifact',
output: {
classification: output.classification,
silentCandidate: output.silentCandidate,
silentLaunchOk: output.silentLaunchOk,
requiredNextAction: output.requiredNextAction,
handoff: output.handoff,
},
});
} finally {
fs.rmSync(realismWorkspace, { recursive: true, force: true });
}
assertErrorCase('invalid json', ['--compact'], 'INVALID_JSON: input must be valid JSON', 'not-json\n'); assertErrorCase('invalid json', ['--compact'], 'INVALID_JSON: input must be valid JSON', 'not-json\n');
assertErrorCase('missing input value', ['--input'], 'CLI_ERROR: --input requires a value'); assertErrorCase('missing input value', ['--input'], 'CLI_ERROR: --input requires a value');
assertErrorCase('unknown argument', ['--bogus'], 'CLI_ERROR: unknown argument: --bogus'); assertErrorCase('unknown argument', ['--bogus'], 'CLI_ERROR: unknown argument: --bogus');