#!/usr/bin/env node import assert from 'node:assert/strict'; import { spawnSync } from 'node:child_process'; import path from 'node:path'; import { fileURLToPath } from 'node:url'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); const gateScript = path.join(__dirname, 'proactive_report_gate_lock.mjs'); const scenarios = [ { name: 'missing first report trigger', input: { classification: 'long_task', silentCandidate: true, nextReportCondition: 'after subagent returns', fallbackState: 'blocked', reportMode: 'watchdog', }, expected: { gateRequired: true, gateStatus: 'fail', reasonIncludes: 'missing first proactive report trigger', requiredEvidenceKey: 'firstReportTrigger', allowedResponseModesIncludes: 'non_silent_follow_up', }, }, { name: 'missing next report condition', input: { classification: 'long_task', silentCandidate: true, firstReportTrigger: 'when delegated scan returns or at 10 minutes, whichever comes first', fallbackState: 'paused', reportMode: 'watchdog', }, expected: { gateRequired: true, gateStatus: 'fail', reasonIncludes: 'missing next proactive report condition', requiredEvidenceKey: 'nextReportCondition', allowedResponseModesIncludes: 'non_silent_follow_up', }, }, { name: 'missing fallback state', input: { classification: 'long_task', silentCandidate: true, firstReportTrigger: 'when delegated scan returns', nextReportCondition: 'report again after verifier output or blocker-state change', reportMode: 'watchdog', }, expected: { gateRequired: true, gateStatus: 'fail', reasonIncludes: 'missing fallback state for stalled reporting', requiredEvidenceKey: 'fallbackState', allowedResponseModesIncludes: 'non_silent_follow_up', }, }, { name: 'checkpoint-only spoof is insufficient', input: { classification: 'long_task', silentCandidate: true, externalizedCheckpointPath: 'checkpoints/task-123.json', checkpointTrigger: 'when subagent returns', }, expected: { gateRequired: true, gateStatus: 'fail', reasonIncludes: 'checkpoint path alone does not satisfy proactive report binding', requiredEvidenceKey: 'firstReportTrigger', allowedResponseModesIncludes: 'non_silent_follow_up', }, }, { name: 'valid proactive report binding', input: { classification: 'long_task', silentCandidate: true, firstReportTrigger: 'when delegated scan returns or at 10 minutes, whichever comes first', nextReportCondition: 'report again only after new verifier output or blocker-state change', fallbackState: 'blocked', reportMode: 'watchdog', ownerVisibleIfStalled: true, }, expected: { gateRequired: true, gateStatus: 'pass', reasonIncludes: 'proactive report binding is complete for silent progression', allowedResponseModesIncludes: 'silent_continuation', requiredEvidenceLength: 0, reportBindingStatus: 'bound', }, }, { name: 'non-silent long-task is not gated', input: { classification: 'long_task', silentCandidate: false, }, expected: { gateRequired: false, gateStatus: 'not_applicable', reasonIncludes: 'not a silent progression candidate', allowedResponseModesIncludes: 'direct_reply', requiredEvidenceLength: 0, }, }, { name: 'owner decision + button-path handoff', input: { classification: 'long_task', silentCandidate: true, firstReportTrigger: 'when delegated scan returns or at 10 minutes, whichever comes first', nextReportCondition: 'report again only after new verifier output or blocker-state change', fallbackState: 'waiting_user', reportMode: 'button_path', ownerVisibleIfStalled: true, needsOwnerDecision: true, handoffMode: 'button_path', }, expected: { gateRequired: true, gateStatus: 'pass', reasonIncludes: 'owner decision flow preserves button-path handoff', allowedResponseModesIncludes: 'button_path', disallowedResponseMode: 'plain_text_closure', requiredEvidenceLength: 0, reportBindingStatus: 'bound', }, }, ]; function runGate(input) { const result = spawnSync(process.execPath, [gateScript, '--compact'], { input: JSON.stringify(input), encoding: 'utf8', }); if (result.status !== 0) { throw new Error(`gate script failed with status=${result.status}: ${result.stderr || result.stdout}`); } let parsed; try { parsed = JSON.parse(result.stdout); } catch (error) { throw new Error(`gate script returned invalid JSON: ${error.message}\nstdout=${result.stdout}`); } return parsed; } function requireCoreFields(output) { assert.equal(typeof output.gateRequired, 'boolean', 'gateRequired should be boolean'); assert.equal(typeof output.gateStatus, 'string', 'gateStatus should be string'); assert.ok(Array.isArray(output.reasons), 'reasons should be an array'); assert.ok(Array.isArray(output.requiredEvidence), 'requiredEvidence should be an array'); assert.ok(Array.isArray(output.allowedResponseModes), 'allowedResponseModes should be an array'); } function assertScenario(output, expected) { if (typeof expected.gateRequired === 'boolean') { assert.equal(output.gateRequired, expected.gateRequired, 'gateRequired mismatch'); } if (typeof expected.gateStatus === 'string') { assert.equal(output.gateStatus, expected.gateStatus, 'gateStatus mismatch'); } if (expected.reasonIncludes) { assert.ok( output.reasons.some((reason) => reason.includes(expected.reasonIncludes)), `expected reasons to include: ${expected.reasonIncludes}`, ); } if (expected.allowedResponseModesIncludes) { assert.ok( output.allowedResponseModes.includes(expected.allowedResponseModesIncludes), `expected allowedResponseModes to include: ${expected.allowedResponseModesIncludes}`, ); } if (expected.disallowedResponseMode) { assert.ok( !output.allowedResponseModes.includes(expected.disallowedResponseMode), `expected allowedResponseModes to exclude: ${expected.disallowedResponseMode}`, ); } if (typeof expected.requiredEvidenceLength === 'number') { assert.equal( output.requiredEvidence.length, expected.requiredEvidenceLength, 'requiredEvidence length mismatch', ); } if (expected.requiredEvidenceKey) { assert.ok( output.requiredEvidence.some((entry) => entry && entry.evidenceKey === expected.requiredEvidenceKey), `expected requiredEvidence to include key: ${expected.requiredEvidenceKey}`, ); } if (expected.reportBindingStatus) { assert.equal(output.reportBindingStatus, expected.reportBindingStatus, 'reportBindingStatus mismatch'); } } const results = []; let failed = false; for (const scenario of scenarios) { try { const output = runGate(scenario.input); requireCoreFields(output); assertScenario(output, scenario.expected); results.push({ scenario: scenario.name, ok: true, gateRequired: output.gateRequired, gateStatus: output.gateStatus, reasons: output.reasons, requiredEvidenceKeys: output.requiredEvidence.map((entry) => entry.evidenceKey), allowedResponseModes: output.allowedResponseModes, reportBindingStatus: output.reportBindingStatus, assertion: 'pass', }); } catch (error) { failed = true; results.push({ scenario: scenario.name, ok: false, assertion: 'fail', error: error instanceof Error ? error.message : String(error), }); } } const summary = { total: results.length, passed: results.filter((entry) => entry.ok).length, failed: results.filter((entry) => !entry.ok).length, }; process.stdout.write(`${JSON.stringify({ summary, results }, null, 2)}\n`); if (failed) process.exit(1);