fix continuity clean-room install verification
This commit is contained in:
179
scripts/test_long_task_governor_wrapper.mjs
Normal file
179
scripts/test_long_task_governor_wrapper.mjs
Normal file
@@ -0,0 +1,179 @@
|
||||
#!/usr/bin/env node
|
||||
import assert from 'node:assert/strict';
|
||||
import fs from 'node:fs';
|
||||
import os from 'node:os';
|
||||
import { execFileSync, spawnSync } from 'node:child_process';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
const repoRoot = path.resolve(__dirname, '..');
|
||||
const wrapperPath = path.join(repoRoot, 'scripts', 'long_task_governor_wrapper.mjs');
|
||||
|
||||
const fixtures = [
|
||||
{
|
||||
name: 'example',
|
||||
file: path.join(repoRoot, 'docs', '_artifacts', 'long_task_governor_wrapper_example.json'),
|
||||
assert(output) {
|
||||
assert.equal(output.classification, 'long_task');
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'borderline wrapper inference',
|
||||
input: {
|
||||
requestText: 'Inspect the current hook and compare it to the wrapper outputs before replying.',
|
||||
canReplyNow: false,
|
||||
},
|
||||
assert(output) {
|
||||
assert.equal(output.classification, 'long_task');
|
||||
assert.equal(output.needsCheckpoint, true);
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'invalid silent',
|
||||
file: path.join(repoRoot, 'docs', '_artifacts', 'long_task_governor_wrapper_invalid_silent_example.json'),
|
||||
assert(output) {
|
||||
assert.equal(output.silentLaunchOk, false);
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'general chat',
|
||||
file: path.join(repoRoot, 'docs', '_artifacts', 'long_task_governor_wrapper_general_chat_example.json'),
|
||||
assert(output) {
|
||||
assert.equal(output.classification, 'general_chat');
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'non-silent long task',
|
||||
file: path.join(repoRoot, 'docs', '_artifacts', 'long_task_governor_wrapper_non_silent_long_task_example.json'),
|
||||
assert(output) {
|
||||
assert.equal(output.classification, 'long_task');
|
||||
assert.equal(output.silentCandidate, false);
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'owner decision',
|
||||
file: path.join(repoRoot, 'docs', '_artifacts', 'long_task_governor_wrapper_silent_owner_decision_example.json'),
|
||||
assert(output) {
|
||||
assert.equal(output.handoff.mode, 'button_path');
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'subagent wait',
|
||||
file: path.join(repoRoot, 'docs', '_artifacts', 'long_task_governor_wrapper_subagent_wait_example.json'),
|
||||
assert(output) {
|
||||
assert.equal(output.silentCandidate, true);
|
||||
assert.ok(output.progressEvidence && typeof output.progressEvidence === 'object', 'subagent wait: missing progressEvidence');
|
||||
assert.equal(typeof output.progressEvidence.sessionKey, 'string', 'subagent wait: missing progressEvidence.sessionKey');
|
||||
assert.ok(output.progressEvidence.sessionKey.length > 0, 'subagent wait: empty progressEvidence.sessionKey');
|
||||
assert.equal(typeof output.externalizedCheckpointPath, 'string', 'subagent wait: missing externalizedCheckpointPath');
|
||||
assert.ok(output.externalizedCheckpointPath.length > 0, 'subagent wait: empty externalizedCheckpointPath');
|
||||
assert.equal('task_name' in (output.progressEvidence ?? {}), false, 'subagent wait: progressEvidence must not backfill taskRecord.task_name');
|
||||
assert.equal(JSON.stringify(output.progressEvidence).includes('Wait for delegated log survey'), false, 'subagent wait: progressEvidence must not derive from taskRecord.task_name');
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
function runFixture(fixture) {
|
||||
const args = fixture.file
|
||||
? [wrapperPath, '--compact', '--input', fixture.file]
|
||||
: [wrapperPath, '--compact'];
|
||||
const options = {
|
||||
cwd: repoRoot,
|
||||
encoding: 'utf8',
|
||||
};
|
||||
|
||||
if (fixture.input) {
|
||||
options.input = `${JSON.stringify(fixture.input)}\n`;
|
||||
}
|
||||
|
||||
const stdout = execFileSync(process.execPath, args, options);
|
||||
|
||||
let output;
|
||||
try {
|
||||
output = JSON.parse(stdout);
|
||||
} catch (error) {
|
||||
throw new Error(`Fixture \"${fixture.name}\" did not produce valid JSON: ${error.message}\nOutput: ${stdout}`);
|
||||
}
|
||||
|
||||
assert.ok(output.classification !== undefined, `${fixture.name}: missing classification`);
|
||||
assert.ok(output.silentCandidate !== undefined, `${fixture.name}: missing silentCandidate`);
|
||||
assert.ok(output.silentLaunchOk !== undefined, `${fixture.name}: missing silentLaunchOk`);
|
||||
assert.ok(output.requiredNextAction !== undefined, `${fixture.name}: missing requiredNextAction`);
|
||||
assert.ok(output.handoff && output.handoff.mode !== undefined, `${fixture.name}: missing handoff.mode`);
|
||||
|
||||
fixture.assert(output);
|
||||
|
||||
return {
|
||||
name: fixture.name,
|
||||
output,
|
||||
};
|
||||
}
|
||||
|
||||
function assertErrorCase(name, args, expectedStderr, input) {
|
||||
const result = spawnSync(process.execPath, [wrapperPath, ...args], {
|
||||
cwd: repoRoot,
|
||||
encoding: 'utf8',
|
||||
input,
|
||||
});
|
||||
|
||||
assert.notEqual(result.status, 0, `${name}: expected non-zero exit`);
|
||||
assert.equal(result.stdout, '', `${name}: expected empty stdout`);
|
||||
assert.equal(result.stderr.trim(), expectedStderr, `${name}: unexpected stderr`);
|
||||
}
|
||||
|
||||
function main() {
|
||||
const results = fixtures.map(runFixture);
|
||||
|
||||
const realismWorkspace = fs.mkdtempSync(path.join(os.tmpdir(), 'wrapper-realism-'));
|
||||
try {
|
||||
const realismInput = path.join(repoRoot, 'docs', '_artifacts', 'long_task_governor_wrapper_subagent_wait_example.json');
|
||||
const stdout = execFileSync(process.execPath, [wrapperPath, '--compact', '--input', realismInput], {
|
||||
cwd: realismWorkspace,
|
||||
encoding: 'utf8',
|
||||
});
|
||||
const output = JSON.parse(stdout);
|
||||
assert.equal(typeof output.externalizedCheckpointPath, 'string', 'realism: missing externalizedCheckpointPath');
|
||||
assert.ok(output.externalizedCheckpointPath.length > 0, 'realism: empty externalizedCheckpointPath');
|
||||
const artifactPath = path.join(realismWorkspace, output.externalizedCheckpointPath);
|
||||
assert.ok(fs.existsSync(artifactPath), `realism: checkpoint artifact missing at ${artifactPath}`);
|
||||
const artifactBody = fs.readFileSync(artifactPath, 'utf8');
|
||||
assert.ok(artifactBody.trim().length > 0, 'realism: checkpoint artifact should be readable and non-empty');
|
||||
assert.equal('task_name' in (output.progressEvidence ?? {}), false, 'realism: progressEvidence must not include task_name fallback');
|
||||
assert.equal(artifactBody.includes('Wait for delegated log survey'), false, 'realism: checkpoint artifact must not fall back to taskRecord.task_name');
|
||||
results.push({
|
||||
name: 'real checkpoint artifact',
|
||||
output: {
|
||||
classification: output.classification,
|
||||
silentCandidate: output.silentCandidate,
|
||||
silentLaunchOk: output.silentLaunchOk,
|
||||
requiredNextAction: output.requiredNextAction,
|
||||
handoff: output.handoff,
|
||||
},
|
||||
});
|
||||
} finally {
|
||||
fs.rmSync(realismWorkspace, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
assertErrorCase('invalid json', ['--compact'], 'INVALID_JSON: input must be valid JSON', 'not-json\n');
|
||||
assertErrorCase('missing input value', ['--input'], 'CLI_ERROR: --input requires a value');
|
||||
assertErrorCase('unknown argument', ['--bogus'], 'CLI_ERROR: unknown argument: --bogus');
|
||||
|
||||
const summary = {
|
||||
passed: results.length,
|
||||
fixtures: results.map(({ name, output }) => ({
|
||||
name,
|
||||
classification: output.classification,
|
||||
silentCandidate: output.silentCandidate,
|
||||
silentLaunchOk: output.silentLaunchOk,
|
||||
requiredNextAction: output.requiredNextAction,
|
||||
handoffMode: output.handoff.mode,
|
||||
})),
|
||||
errorCases: 3,
|
||||
};
|
||||
|
||||
process.stdout.write(JSON.stringify(summary, null, 2) + '\n');
|
||||
}
|
||||
|
||||
main();
|
||||
Reference in New Issue
Block a user