Files
reporting-governance-plugin/scripts/test_proactive_report_gate_lock.mjs

256 lines
7.9 KiB
JavaScript

#!/usr/bin/env node
import assert from 'node:assert/strict';
import { spawnSync } from 'node:child_process';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const gateScript = path.join(__dirname, 'proactive_report_gate_lock.mjs');
const scenarios = [
{
name: 'missing first report trigger',
input: {
classification: 'long_task',
silentCandidate: true,
nextReportCondition: 'after subagent returns',
fallbackState: 'blocked',
reportMode: 'watchdog',
},
expected: {
gateRequired: true,
gateStatus: 'fail',
reasonIncludes: 'missing first proactive report trigger',
requiredEvidenceKey: 'firstReportTrigger',
allowedResponseModesIncludes: 'non_silent_follow_up',
},
},
{
name: 'missing next report condition',
input: {
classification: 'long_task',
silentCandidate: true,
firstReportTrigger: 'when delegated scan returns or at 10 minutes, whichever comes first',
fallbackState: 'paused',
reportMode: 'watchdog',
},
expected: {
gateRequired: true,
gateStatus: 'fail',
reasonIncludes: 'missing next proactive report condition',
requiredEvidenceKey: 'nextReportCondition',
allowedResponseModesIncludes: 'non_silent_follow_up',
},
},
{
name: 'missing fallback state',
input: {
classification: 'long_task',
silentCandidate: true,
firstReportTrigger: 'when delegated scan returns',
nextReportCondition: 'report again after verifier output or blocker-state change',
reportMode: 'watchdog',
},
expected: {
gateRequired: true,
gateStatus: 'fail',
reasonIncludes: 'missing fallback state for stalled reporting',
requiredEvidenceKey: 'fallbackState',
allowedResponseModesIncludes: 'non_silent_follow_up',
},
},
{
name: 'checkpoint-only spoof is insufficient',
input: {
classification: 'long_task',
silentCandidate: true,
externalizedCheckpointPath: 'checkpoints/task-123.json',
checkpointTrigger: 'when subagent returns',
},
expected: {
gateRequired: true,
gateStatus: 'fail',
reasonIncludes: 'checkpoint path alone does not satisfy proactive report binding',
requiredEvidenceKey: 'firstReportTrigger',
allowedResponseModesIncludes: 'non_silent_follow_up',
},
},
{
name: 'valid proactive report binding',
input: {
classification: 'long_task',
silentCandidate: true,
firstReportTrigger: 'when delegated scan returns or at 10 minutes, whichever comes first',
nextReportCondition: 'report again only after new verifier output or blocker-state change',
fallbackState: 'blocked',
reportMode: 'watchdog',
ownerVisibleIfStalled: true,
},
expected: {
gateRequired: true,
gateStatus: 'pass',
reasonIncludes: 'proactive report binding is complete for silent progression',
allowedResponseModesIncludes: 'silent_continuation',
requiredEvidenceLength: 0,
reportBindingStatus: 'bound',
},
},
{
name: 'non-silent long-task is not gated',
input: {
classification: 'long_task',
silentCandidate: false,
},
expected: {
gateRequired: false,
gateStatus: 'not_applicable',
reasonIncludes: 'not a silent progression candidate',
allowedResponseModesIncludes: 'direct_reply',
requiredEvidenceLength: 0,
},
},
{
name: 'owner decision + button-path handoff',
input: {
classification: 'long_task',
silentCandidate: true,
firstReportTrigger: 'when delegated scan returns or at 10 minutes, whichever comes first',
nextReportCondition: 'report again only after new verifier output or blocker-state change',
fallbackState: 'waiting_user',
reportMode: 'button_path',
ownerVisibleIfStalled: true,
needsOwnerDecision: true,
handoffMode: 'button_path',
},
expected: {
gateRequired: true,
gateStatus: 'pass',
reasonIncludes: 'owner decision flow preserves button-path handoff',
allowedResponseModesIncludes: 'button_path',
disallowedResponseMode: 'plain_text_closure',
requiredEvidenceLength: 0,
reportBindingStatus: 'bound',
},
},
];
function runGate(input) {
const result = spawnSync(process.execPath, [gateScript, '--compact'], {
input: JSON.stringify(input),
encoding: 'utf8',
});
if (result.status !== 0) {
throw new Error(`gate script failed with status=${result.status}: ${result.stderr || result.stdout}`);
}
let parsed;
try {
parsed = JSON.parse(result.stdout);
} catch (error) {
throw new Error(`gate script returned invalid JSON: ${error.message}\nstdout=${result.stdout}`);
}
return parsed;
}
function requireCoreFields(output) {
assert.equal(typeof output.gateRequired, 'boolean', 'gateRequired should be boolean');
assert.equal(typeof output.gateStatus, 'string', 'gateStatus should be string');
assert.ok(Array.isArray(output.reasons), 'reasons should be an array');
assert.ok(Array.isArray(output.requiredEvidence), 'requiredEvidence should be an array');
assert.ok(Array.isArray(output.allowedResponseModes), 'allowedResponseModes should be an array');
}
function assertScenario(output, expected) {
if (typeof expected.gateRequired === 'boolean') {
assert.equal(output.gateRequired, expected.gateRequired, 'gateRequired mismatch');
}
if (typeof expected.gateStatus === 'string') {
assert.equal(output.gateStatus, expected.gateStatus, 'gateStatus mismatch');
}
if (expected.reasonIncludes) {
assert.ok(
output.reasons.some((reason) => reason.includes(expected.reasonIncludes)),
`expected reasons to include: ${expected.reasonIncludes}`,
);
}
if (expected.allowedResponseModesIncludes) {
assert.ok(
output.allowedResponseModes.includes(expected.allowedResponseModesIncludes),
`expected allowedResponseModes to include: ${expected.allowedResponseModesIncludes}`,
);
}
if (expected.disallowedResponseMode) {
assert.ok(
!output.allowedResponseModes.includes(expected.disallowedResponseMode),
`expected allowedResponseModes to exclude: ${expected.disallowedResponseMode}`,
);
}
if (typeof expected.requiredEvidenceLength === 'number') {
assert.equal(
output.requiredEvidence.length,
expected.requiredEvidenceLength,
'requiredEvidence length mismatch',
);
}
if (expected.requiredEvidenceKey) {
assert.ok(
output.requiredEvidence.some((entry) => entry && entry.evidenceKey === expected.requiredEvidenceKey),
`expected requiredEvidence to include key: ${expected.requiredEvidenceKey}`,
);
}
if (expected.reportBindingStatus) {
assert.equal(output.reportBindingStatus, expected.reportBindingStatus, 'reportBindingStatus mismatch');
}
}
const results = [];
let failed = false;
for (const scenario of scenarios) {
try {
const output = runGate(scenario.input);
requireCoreFields(output);
assertScenario(output, scenario.expected);
results.push({
scenario: scenario.name,
ok: true,
gateRequired: output.gateRequired,
gateStatus: output.gateStatus,
reasons: output.reasons,
requiredEvidenceKeys: output.requiredEvidence.map((entry) => entry.evidenceKey),
allowedResponseModes: output.allowedResponseModes,
reportBindingStatus: output.reportBindingStatus,
assertion: 'pass',
});
} catch (error) {
failed = true;
results.push({
scenario: scenario.name,
ok: false,
assertion: 'fail',
error: error instanceof Error ? error.message : String(error),
});
}
}
const summary = {
total: results.length,
passed: results.filter((entry) => entry.ok).length,
failed: results.filter((entry) => !entry.ok).length,
};
process.stdout.write(`${JSON.stringify({ summary, results }, null, 2)}\n`);
if (failed) process.exit(1);