#!/usr/bin/env node import assert from 'node:assert/strict'; import { spawnSync } from 'node:child_process'; import path from 'node:path'; import { fileURLToPath } from 'node:url'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); const gateScript = path.join(__dirname, 'long_task_gate_lock.mjs'); const scenarios = [ { name: 'ordinary chat -> gateStatus=not_applicable', input: { classification: 'ordinary_chat', message: 'just answer directly', }, expected: { gateRequired: false, gateStatus: 'not_applicable', reasonIncludes: 'classification is not long_task', allowedResponseModesIncludes: 'direct_reply', requiredEvidenceLength: 0, }, }, { name: 'long-task missing externalized checkpoint -> gateStatus=fail', input: { classification: 'long_task', silentContinuation: true, }, expected: { gateRequired: true, gateStatus: 'fail', reasonIncludes: 'silent long-task cannot continue without externalized checkpoint path', allowedResponseModesIncludes: 'non_silent_follow_up', requiredEvidenceKey: 'externalizedCheckpoint', }, }, { name: 'long-task with explicit externalized checkpoint + concrete next action -> gateStatus=pass', input: { classification: 'long_task', silentContinuation: true, claimedExecution: true, externalizedCheckpointPath: 'checkpoints/task-42.md', concreteNextAction: 'Run the queued verifier and report back with output.', }, expected: { gateRequired: true, gateStatus: 'pass', reasonIncludes: 'required long-task gate evidence is present or no gated condition was triggered', allowedResponseModesIncludes: 'silent_continuation', allowedResponseModesIncludesAlso: 'direct_reply', requiredEvidenceLength: 0, }, }, { name: 'owner decision without button-path -> gateStatus=fail', input: { classification: 'long_task', needsOwnerDecision: true, replyClosureMode: 'plain_text', }, expected: { gateRequired: true, gateStatus: 'fail', reasonIncludes: 'owner decision flow must end in button-path, not plain text', allowedResponseModesIncludes: 'button_path', requiredEvidenceKey: 'buttonPathMode', }, }, { name: 'owner decision with button-path -> gateStatus=pass', input: { classification: 'long_task', needsOwnerDecision: true, replyClosureMode: 'button_path', }, expected: { gateRequired: true, gateStatus: 'pass', reasonIncludes: 'required long-task gate evidence is present or no gated condition was triggered', allowedResponseModesIncludes: 'button_path', requiredEvidenceLength: 0, }, }, ]; function runGate(input) { const result = spawnSync(process.execPath, [gateScript, '--compact'], { input: JSON.stringify(input), encoding: 'utf8', }); if (result.status !== 0) { throw new Error(`gate script failed with status=${result.status}: ${result.stderr || result.stdout}`); } let parsed; try { parsed = JSON.parse(result.stdout); } catch (error) { throw new Error(`gate script returned invalid JSON: ${error.message}\nstdout=${result.stdout}`); } return parsed; } function requireCoreFields(output) { assert.equal(typeof output.gateRequired, 'boolean', 'gateRequired should be boolean'); assert.equal(typeof output.gateStatus, 'string', 'gateStatus should be string'); assert.ok(Array.isArray(output.reasons), 'reasons should be an array'); assert.ok(Array.isArray(output.requiredEvidence), 'requiredEvidence should be an array'); assert.ok(Array.isArray(output.allowedResponseModes), 'allowedResponseModes should be an array'); } function assertScenario(output, expected) { assert.equal(output.gateRequired, expected.gateRequired, 'gateRequired mismatch'); assert.equal(output.gateStatus, expected.gateStatus, 'gateStatus mismatch'); if (expected.reasonIncludes) { assert.ok( output.reasons.some((reason) => reason.includes(expected.reasonIncludes)), `expected reasons to include: ${expected.reasonIncludes}`, ); } if (expected.allowedResponseModesIncludes) { assert.ok( output.allowedResponseModes.includes(expected.allowedResponseModesIncludes), `expected allowedResponseModes to include: ${expected.allowedResponseModesIncludes}`, ); } if (expected.allowedResponseModesIncludesAlso) { assert.ok( output.allowedResponseModes.includes(expected.allowedResponseModesIncludesAlso), `expected allowedResponseModes to include: ${expected.allowedResponseModesIncludesAlso}`, ); } if (typeof expected.requiredEvidenceLength === 'number') { assert.equal( output.requiredEvidence.length, expected.requiredEvidenceLength, 'requiredEvidence length mismatch', ); } if (expected.requiredEvidenceKey) { assert.ok( output.requiredEvidence.some((entry) => entry && entry.evidenceKey === expected.requiredEvidenceKey), `expected requiredEvidence to include key: ${expected.requiredEvidenceKey}`, ); } } const results = []; let failed = false; for (const scenario of scenarios) { try { const output = runGate(scenario.input); requireCoreFields(output); assertScenario(output, scenario.expected); results.push({ scenario: scenario.name, ok: true, gateRequired: output.gateRequired, gateStatus: output.gateStatus, reasons: output.reasons, requiredEvidenceKeys: output.requiredEvidence.map((entry) => entry.evidenceKey), allowedResponseModes: output.allowedResponseModes, assertion: 'pass', }); } catch (error) { failed = true; results.push({ scenario: scenario.name, ok: false, assertion: 'fail', error: error instanceof Error ? error.message : String(error), }); } } const summary = { total: results.length, passed: results.filter((entry) => entry.ok).length, failed: results.filter((entry) => !entry.ok).length, }; process.stdout.write(`${JSON.stringify({ summary, results }, null, 2)}\n`); if (failed) process.exit(1);