diff --git a/scripts/test_long_task_gate_lock.mjs b/scripts/test_long_task_gate_lock.mjs new file mode 100644 index 0000000..bd02eb9 --- /dev/null +++ b/scripts/test_long_task_gate_lock.mjs @@ -0,0 +1,197 @@ +#!/usr/bin/env node +import assert from 'node:assert/strict'; +import { spawnSync } from 'node:child_process'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); +const gateScript = path.join(__dirname, 'long_task_gate_lock.mjs'); + +const scenarios = [ + { + name: 'ordinary chat -> gateStatus=not_applicable', + input: { + classification: 'ordinary_chat', + message: 'just answer directly', + }, + expected: { + gateRequired: false, + gateStatus: 'not_applicable', + reasonIncludes: 'classification is not long_task', + allowedResponseModesIncludes: 'direct_reply', + requiredEvidenceLength: 0, + }, + }, + { + name: 'long-task missing externalized checkpoint -> gateStatus=fail', + input: { + classification: 'long_task', + silentContinuation: true, + }, + expected: { + gateRequired: true, + gateStatus: 'fail', + reasonIncludes: 'silent long-task cannot continue without externalized checkpoint path', + allowedResponseModesIncludes: 'non_silent_follow_up', + requiredEvidenceKey: 'externalizedCheckpoint', + }, + }, + { + name: 'long-task with explicit externalized checkpoint + concrete next action -> gateStatus=pass', + input: { + classification: 'long_task', + silentContinuation: true, + claimedExecution: true, + externalizedCheckpointPath: 'checkpoints/task-42.md', + concreteNextAction: 'Run the queued verifier and report back with output.', + }, + expected: { + gateRequired: true, + gateStatus: 'pass', + reasonIncludes: 'required long-task gate evidence is present or no gated condition was triggered', + allowedResponseModesIncludes: 'silent_continuation', + allowedResponseModesIncludesAlso: 'direct_reply', + requiredEvidenceLength: 0, + }, + }, + { + name: 'owner decision without button-path -> gateStatus=fail', + input: { + classification: 'long_task', + needsOwnerDecision: true, + replyClosureMode: 'plain_text', + }, + expected: { + gateRequired: true, + gateStatus: 'fail', + reasonIncludes: 'owner decision flow must end in button-path, not plain text', + allowedResponseModesIncludes: 'button_path', + requiredEvidenceKey: 'buttonPathMode', + }, + }, + { + name: 'owner decision with button-path -> gateStatus=pass', + input: { + classification: 'long_task', + needsOwnerDecision: true, + replyClosureMode: 'button_path', + }, + expected: { + gateRequired: true, + gateStatus: 'pass', + reasonIncludes: 'required long-task gate evidence is present or no gated condition was triggered', + allowedResponseModesIncludes: 'button_path', + requiredEvidenceLength: 0, + }, + }, +]; + +function runGate(input) { + const result = spawnSync(process.execPath, [gateScript, '--compact'], { + input: JSON.stringify(input), + encoding: 'utf8', + }); + + if (result.status !== 0) { + throw new Error(`gate script failed with status=${result.status}: ${result.stderr || result.stdout}`); + } + + let parsed; + try { + parsed = JSON.parse(result.stdout); + } catch (error) { + throw new Error(`gate script returned invalid JSON: ${error.message}\nstdout=${result.stdout}`); + } + + return parsed; +} + +function requireCoreFields(output) { + assert.equal(typeof output.gateRequired, 'boolean', 'gateRequired should be boolean'); + assert.equal(typeof output.gateStatus, 'string', 'gateStatus should be string'); + assert.ok(Array.isArray(output.reasons), 'reasons should be an array'); + assert.ok(Array.isArray(output.requiredEvidence), 'requiredEvidence should be an array'); + assert.ok(Array.isArray(output.allowedResponseModes), 'allowedResponseModes should be an array'); +} + +function assertScenario(output, expected) { + assert.equal(output.gateRequired, expected.gateRequired, 'gateRequired mismatch'); + assert.equal(output.gateStatus, expected.gateStatus, 'gateStatus mismatch'); + + if (expected.reasonIncludes) { + assert.ok( + output.reasons.some((reason) => reason.includes(expected.reasonIncludes)), + `expected reasons to include: ${expected.reasonIncludes}`, + ); + } + + if (expected.allowedResponseModesIncludes) { + assert.ok( + output.allowedResponseModes.includes(expected.allowedResponseModesIncludes), + `expected allowedResponseModes to include: ${expected.allowedResponseModesIncludes}`, + ); + } + + if (expected.allowedResponseModesIncludesAlso) { + assert.ok( + output.allowedResponseModes.includes(expected.allowedResponseModesIncludesAlso), + `expected allowedResponseModes to include: ${expected.allowedResponseModesIncludesAlso}`, + ); + } + + if (typeof expected.requiredEvidenceLength === 'number') { + assert.equal( + output.requiredEvidence.length, + expected.requiredEvidenceLength, + 'requiredEvidence length mismatch', + ); + } + + if (expected.requiredEvidenceKey) { + assert.ok( + output.requiredEvidence.some((entry) => entry && entry.evidenceKey === expected.requiredEvidenceKey), + `expected requiredEvidence to include key: ${expected.requiredEvidenceKey}`, + ); + } +} + +const results = []; +let failed = false; + +for (const scenario of scenarios) { + try { + const output = runGate(scenario.input); + requireCoreFields(output); + assertScenario(output, scenario.expected); + + results.push({ + scenario: scenario.name, + ok: true, + gateRequired: output.gateRequired, + gateStatus: output.gateStatus, + reasons: output.reasons, + requiredEvidenceKeys: output.requiredEvidence.map((entry) => entry.evidenceKey), + allowedResponseModes: output.allowedResponseModes, + assertion: 'pass', + }); + } catch (error) { + failed = true; + results.push({ + scenario: scenario.name, + ok: false, + assertion: 'fail', + error: error instanceof Error ? error.message : String(error), + }); + } +} + +const summary = { + total: results.length, + passed: results.filter((entry) => entry.ok).length, + failed: results.filter((entry) => !entry.ok).length, +}; + +process.stdout.write(`${JSON.stringify({ summary, results }, null, 2)}\n`); + +if (failed) process.exit(1);