#!/usr/bin/env node import assert from 'node:assert/strict'; import fs from 'node:fs/promises'; import os from 'node:os'; import path from 'node:path'; import { pathToFileURL } from 'node:url'; import { stripTypeScriptTypes } from 'node:module'; const __dirname = path.dirname(new URL(import.meta.url).pathname); const repoRoot = path.resolve(__dirname, '..'); const handlerPath = path.join(repoRoot, 'hooks', 'force-recall', 'handler.ts'); const wrapperPath = path.join(repoRoot, 'scripts', 'long_task_governor_wrapper.mjs'); const gateLockPath = path.join(repoRoot, 'scripts', 'long_task_gate_lock.mjs'); const plannerPath = path.join(repoRoot, 'scripts', 'plan_long_task_auto_chain.mjs'); async function importTsModule(tsPath) { const source = await fs.readFile(tsPath, 'utf8'); const jsSource = stripTypeScriptTypes(source, { mode: 'strip' }); const dataUrl = `data:text/javascript;charset=utf-8,${encodeURIComponent(jsSource)}\n//# sourceURL=${encodeURIComponent(pathToFileURL(tsPath).href)}`; return import(dataUrl); } function escapeRegex(snippet) { return snippet.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); } async function runScenario(forceRecall, requestText) { const event = { type: 'message', action: 'preprocessed', context: { workspaceDir: repoRoot, body: requestText, bodyForAgent: requestText, }, }; await forceRecall(event); const injected = event.context?.bodyForAgent; assert.equal(typeof injected, 'string', 'event.context.bodyForAgent should be a string after handler runs'); return injected; } async function withPatchedWrapper(tempContent, callback) { const originalWrapper = await fs.readFile(wrapperPath, 'utf8'); await fs.writeFile(wrapperPath, tempContent, 'utf8'); try { return await callback(); } finally { await fs.writeFile(wrapperPath, originalWrapper, 'utf8'); } } function buildWrapperScript(wrapperResult) { return `#!/usr/bin/env node\nprocess.stdout.write(JSON.stringify(${JSON.stringify(wrapperResult)}, null, 0) + "\\n");\n`; } async function main() { await Promise.all([fs.access(wrapperPath), fs.access(gateLockPath), fs.access(plannerPath)]); const { default: forceRecall } = await importTsModule(handlerPath); assert.equal(typeof forceRecall, 'function', 'force-recall handler should export default function'); const requestText = [ 'Please inspect the workspace files and verify the hook injection path.', 'I need you to review the behavior, choose the final accept/reject decision,', 'and continue in background with a follow-up later.', ].join(' '); const plannerOnlyRequestText = [ 'Please inspect the workspace files and verify the hook injection path.', 'Summarize the current dry-run planner state for technical inspection only.', ].join(' '); const realWrapperInjected = await runScenario(forceRecall, 'Dispatch a subagent to inspect logs and wait for the result.'); assert.match(realWrapperInjected, /classification=long_task/, 'real wrapper integration should classify subagent wait as long_task'); assert.match(realWrapperInjected, /gateStatus=pass/, 'real wrapper integration should pass gate with real progress evidence'); assert.match(realWrapperInjected, /allowedResponseMode=silent_continuation/, 'real wrapper integration should preserve silent continuation allowance'); assert.doesNotMatch(realWrapperInjected, /reason=claimed progression without concrete progress evidence is forbidden/, 'real wrapper integration should not fail for missing progress evidence'); assert.doesNotMatch(realWrapperInjected, /requiredEvidence=progressEvidence/, 'real wrapper integration should not require synthetic progressEvidence repair'); assert.doesNotMatch(realWrapperInjected, /task_name/, 'real wrapper integration should not leak taskRecord.task_name fallback into gate/preflight text'); const injected = await runScenario(forceRecall, requestText); const expectedSnippets = [ '[LONG_TASK_GOVERNOR_PREFLIGHT]', 'classification=long_task', 'silentLaunchOk=false', 'handoff.mode=button_path', '[LONG_TASK_GATE_LOCK]', 'gateStatus=fail', '[LONG_TASK_AUTO_CHAIN_PLAN]', 'plannerStatus=blocked_by_gate', 'derivedAction=none', 'dispatchMode=no_dispatch', 'autoChainAllowed=false', 'reason=gateStatus must pass before auto-chain planning can proceed', 'requiredEvidence=gateStatus=pass', 'requiredEvidence=externalizedCheckpoint', 'requiredEvidence=concreteNextAction', 'requiredEvidence=buttonPathMode', 'reason=silent long-task cannot continue without externalized checkpoint path', 'reason=claimed execution requires evidence of a concrete next action', 'reason=owner decision flow must end in button-path, not plain text', 'ENFORCEMENT: Hook input should include progressEvidence (or equivalent concrete fields) whenever a progression claim is present.', 'HARD_GATE: Block any plain-text handoff or silent-continuation claim when externalized checkpoint evidence is missing.', 'HARD_GATE: If owner decision is involved, do not replace button-path closure with plain-text handoff.', 'ENFORCEMENT: Forbidden path: plain-text handoff that pretends the long task is already continuing without an externalized checkpoint.', 'ENFORCEMENT: Forbidden path: stating you have already entered the next task/step when the record only contains planning language and no concrete execution evidence.', ]; const unexpectedSnippets = [ 'reason=claimed progression without concrete progress evidence is forbidden', 'requiredEvidence=progressEvidence', ]; for (const snippet of expectedSnippets) { assert.match(injected, new RegExp(escapeRegex(snippet)), `missing snippet: ${snippet}`); } for (const snippet of unexpectedSnippets) { assert.doesNotMatch(injected, new RegExp(escapeRegex(snippet)), `unexpected snippet present: ${snippet}`); } const { evaluateGate } = await import(pathToFileURL(gateLockPath).href + `?t=${Date.now()}`); assert.equal(typeof evaluateGate, 'function', 'long_task_gate_lock should export evaluateGate for direct tests'); const passResult = evaluateGate({ classification: 'long_task', claimedExecution: true, concreteNextAction: 'dispatch_follow_up_subagent', autoChainNextAction: 'dispatch_follow_up_subagent', autoChainDispatchEvidence: { action: 'dispatch_follow_up_subagent', dispatched: true, event: 'dispatch', }, progressionClaim: 'already progressing to the next step in background', progressEvidence: { sessionKey: 'task-123' }, }); assert.equal(passResult.gateStatus, 'pass', 'pass-path should pass with concrete progressEvidence'); const failResult = evaluateGate({ classification: 'long_task', claimedExecution: true, concreteNextAction: 'dispatch_follow_up_subagent', autoChainNextAction: 'dispatch_follow_up_subagent', progressionClaim: 'already progressing to the next step in background', executionEvidence: { concreteNextAction: 'dispatch_follow_up_subagent' }, }); assert.equal(failResult.gateStatus, 'fail', 'fail-path should fail when explicit auto-chain action lacks dispatch evidence'); assert.match(JSON.stringify(failResult), /autoChainDispatchEvidence/, 'fail-path should require autoChainDispatchEvidence'); const neutralResult = evaluateGate({ classification: 'long_task', claimedExecution: true, concreteNextAction: 'summarize findings for reply', executionEvidence: { concreteNextAction: 'summarize findings for reply' }, }); assert.equal(neutralResult.gateStatus, 'pass', 'neutral-path should pass when there is no explicit auto-chain next action'); assert.doesNotMatch(JSON.stringify(neutralResult), /autoChainDispatchEvidence/, 'neutral-path should not require auto-chain dispatch evidence'); const directAutoChainFailResult = evaluateGate({ classification: 'long_task', claimedExecution: true, concreteNextAction: 'dispatch_follow_up_subagent', autoChainNextAction: 'dispatch_follow_up_subagent', }); assert.equal(directAutoChainFailResult.gateStatus, 'fail', 'direct evaluator should fail when explicit auto-chain action has no dispatch evidence'); assert.match(JSON.stringify(directAutoChainFailResult), /explicit auto-chain next action requires dispatched-action evidence/, 'direct evaluator fail-path should mention missing dispatched-action evidence'); const mismatchedDispatchEvidenceResult = evaluateGate({ classification: 'long_task', claimedExecution: true, concreteNextAction: 'dispatch_follow_up_subagent', autoChainNextAction: 'dispatch_follow_up_subagent', autoChainDispatchEvidence: { action: 'dispatch_other_subagent', dispatched: true, event: 'dispatch', }, }); assert.equal(mismatchedDispatchEvidenceResult.gateStatus, 'fail', 'mismatched dispatch evidence should fail'); assert.match(JSON.stringify(mismatchedDispatchEvidenceResult), /autoChainDispatchEvidence/, 'mismatched dispatch evidence should still require matching autoChainDispatchEvidence'); const fakeCheckpointDispatchEvidenceResult = evaluateGate({ classification: 'long_task', claimedExecution: true, concreteNextAction: 'dispatch_follow_up_subagent', autoChainNextAction: 'dispatch_follow_up_subagent', autoChainDispatchEvidence: { sessionKey: 'task-123', checkpointPath: 'checkpoints/task-123.json', }, }); assert.equal(fakeCheckpointDispatchEvidenceResult.gateStatus, 'fail', 'checkpoint/session-only dispatch evidence should fail'); assert.match(JSON.stringify(fakeCheckpointDispatchEvidenceResult), /explicit auto-chain next action requires dispatched-action evidence/, 'checkpoint/session-only dispatch evidence should be rejected as fake dispatch evidence'); const neutralSnakeCaseResult = evaluateGate({ classification: 'long_task', claimedExecution: true, concreteNextAction: 'summarize findings for reply', autoChainNextAction: 'checkpoint_session_metadata_only', executionEvidence: { concreteNextAction: 'summarize findings for reply' }, }); assert.equal(neutralSnakeCaseResult.gateStatus, 'pass', 'neutral snake_case non-dispatch action should not trigger dispatch-evidence requirement'); assert.doesNotMatch(JSON.stringify(neutralSnakeCaseResult), /autoChainDispatchEvidence/, 'neutral snake_case non-dispatch action should not mention dispatch-evidence requirement'); const passInjected = await withPatchedWrapper(buildWrapperScript({ classification: 'long_task', silentCandidate: true, needsCheckpoint: true, needsSubagent: false, needsOwnerDecision: false, silentLaunchOk: true, silentLaunchReason: 'checkpoint established', requiredNextAction: 'dispatch_follow_up_subagent', autoChainDispatchEvidence: { action: 'dispatch_follow_up_subagent', dispatched: true, event: 'dispatch', }, progressEvidence: { sessionKey: 'task-123' }, externalizedCheckpointPath: 'checkpoints/task-123.json', handoff: { mode: 'direct_reply' }, }), async () => runScenario(forceRecall, requestText)); assert.match(passInjected, /gateStatus=pass/, 'hook pass-path should pass when wrapper provides concrete progressEvidence'); assert.match(passInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook pass-path should emit auto-chain plan block'); assert.match(passInjected, /plannerStatus=pass/, 'hook pass-path should expose planner pass result'); assert.match(passInjected, /derivedAction=dispatch_spec_review/, 'hook pass-path should derive dry-run spec review dispatch'); assert.match(passInjected, /dispatchMode=dry_run_dispatch/, 'hook pass-path should stay in dry-run dispatch mode'); assert.match(passInjected, /autoChainAllowed=true/, 'hook pass-path should allow auto-chain in dry-run planner output'); const failInjected = await withPatchedWrapper(buildWrapperScript({ classification: 'long_task', silentCandidate: false, needsCheckpoint: false, needsSubagent: false, needsOwnerDecision: false, silentLaunchOk: false, requiredNextAction: 'dispatch_follow_up_subagent', handoff: { mode: 'direct_reply' }, }), async () => runScenario(forceRecall, requestText)); assert.match(failInjected, /gateStatus=fail/, 'hook fail-path should fail when wrapper exposes explicit auto-chain action without dispatch evidence'); assert.match(failInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook fail-path should emit auto-chain plan block'); assert.match(failInjected, /plannerStatus=blocked_by_gate/, 'hook fail-path should report planner blocked by gate'); assert.match(failInjected, /derivedAction=none/, 'hook fail-path should not derive a dry-run action'); assert.match(failInjected, /dispatchMode=no_dispatch/, 'hook fail-path should remain no-dispatch'); assert.match(failInjected, /autoChainAllowed=false/, 'hook fail-path should not allow auto-chain'); assert.match(failInjected, /reason=explicit auto-chain next action requires dispatched-action evidence/, 'hook fail-path should mention missing dispatched-action evidence'); assert.match(failInjected, /requiredEvidence=autoChainDispatchEvidence/, 'hook fail-path should require autoChainDispatchEvidence'); const neutralInjected = await withPatchedWrapper(buildWrapperScript({ classification: 'long_task', silentCandidate: false, needsCheckpoint: false, needsSubagent: false, needsOwnerDecision: false, silentLaunchOk: false, requiredNextAction: 'summarize findings for reply', handoff: { mode: 'direct_reply' }, }), async () => runScenario(forceRecall, requestText)); assert.match(neutralInjected, /gateStatus=pass/, 'hook neutral-path should pass when wrapper does not expose an explicit auto-chain action'); assert.match(neutralInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook neutral-path should emit auto-chain plan block'); assert.match(neutralInjected, /plannerStatus=none/, 'hook neutral-path should report no derived auto-chain action'); assert.match(neutralInjected, /derivedAction=none/, 'hook neutral-path should keep derivedAction as none'); assert.match(neutralInjected, /dispatchMode=no_dispatch/, 'hook neutral-path should remain no-dispatch'); assert.match(neutralInjected, /autoChainAllowed=false/, 'hook neutral-path should keep auto-chain disabled'); assert.doesNotMatch(neutralInjected, /reason=explicit auto-chain next action requires dispatched-action evidence/, 'hook neutral-path should not fail on auto-chain evidence when no explicit tool action exists'); const fakeProgressEvidenceInjected = await withPatchedWrapper(buildWrapperScript({ classification: 'long_task', silentCandidate: true, needsCheckpoint: true, needsSubagent: false, needsOwnerDecision: false, silentLaunchOk: true, silentLaunchReason: 'task name exists but no externalized artifact', taskRecord: { task_name: 'descriptive-task-name-only' }, handoff: { mode: 'direct_reply' }, }), async () => runScenario(forceRecall, requestText)); assert.match(fakeProgressEvidenceInjected, /gateStatus=fail/, 'hook fake-progress-evidence path should fail when only task_name exists'); assert.match(fakeProgressEvidenceInjected, /reason=claimed progression without concrete progress evidence is forbidden/, 'hook fake-progress-evidence path should mention missing concrete progress evidence'); assert.match(fakeProgressEvidenceInjected, /requiredEvidence=progressEvidence/, 'hook fake-progress-evidence path should require progressEvidence'); assert.match(fakeProgressEvidenceInjected, /reason=silent long-task cannot continue without externalized checkpoint path/, 'hook fake-progress-evidence path should also require real checkpoint evidence'); const specReviewWithoutEvidenceInjected = await withPatchedWrapper(buildWrapperScript({ classification: 'long_task', silentCandidate: false, needsCheckpoint: false, needsSubagent: false, needsOwnerDecision: false, silentLaunchOk: true, requiredNextAction: 'dispatch_code_quality_review', autoChainDispatchEvidence: { action: 'dispatch_code_quality_review', dispatched: true, event: 'dispatch', }, progressEvidence: { sessionKey: 'task-spec-review-missing-evidence' }, externalizedCheckpointPath: 'checkpoints/task-spec-review-missing-evidence.json', handoff: { mode: 'direct_reply' }, }), async () => runScenario(forceRecall, plannerOnlyRequestText)); assert.match(specReviewWithoutEvidenceInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook spec-review missing-evidence path should emit auto-chain plan block'); assert.match(specReviewWithoutEvidenceInjected, /plannerStatus=blocked_by_evidence/, 'hook spec-review missing-evidence path should block on missing evidence'); assert.match(specReviewWithoutEvidenceInjected, /derivedAction=none/, 'hook spec-review missing-evidence path should not derive a dry-run action'); assert.match(specReviewWithoutEvidenceInjected, /dispatchMode=no_dispatch/, 'hook spec-review missing-evidence path should stay no-dispatch'); assert.match(specReviewWithoutEvidenceInjected, /autoChainAllowed=false/, 'hook spec-review missing-evidence path should not allow auto-chain'); assert.match(specReviewWithoutEvidenceInjected, /reason=review pass evidence missing for code quality review transition/, 'hook spec-review missing-evidence path should mention missing review evidence'); assert.match(specReviewWithoutEvidenceInjected, /requiredEvidence=reviewEvidence/, 'hook spec-review missing-evidence path should require reviewEvidence'); const fixSliceWithoutEvidenceInjected = await withPatchedWrapper(buildWrapperScript({ classification: 'long_task', silentCandidate: false, needsCheckpoint: false, needsSubagent: false, needsOwnerDecision: false, silentLaunchOk: true, silentLaunchReason: 'review blocked by findings', requiredNextAction: 'dispatch_fix_slice', autoChainDispatchEvidence: { action: 'dispatch_fix_slice', dispatched: true, event: 'dispatch', }, progressEvidence: { sessionKey: 'task-fix-slice-missing-evidence' }, externalizedCheckpointPath: 'checkpoints/task-fix-slice-missing-evidence.json', handoff: { mode: 'direct_reply' }, }), async () => runScenario(forceRecall, plannerOnlyRequestText)); assert.match(fixSliceWithoutEvidenceInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook fix-slice missing-evidence path should emit auto-chain plan block'); assert.match(fixSliceWithoutEvidenceInjected, /plannerStatus=blocked_by_evidence/, 'hook fix-slice missing-evidence path should block on missing evidence'); assert.match(fixSliceWithoutEvidenceInjected, /derivedAction=none/, 'hook fix-slice missing-evidence path should not derive a dry-run action'); assert.match(fixSliceWithoutEvidenceInjected, /dispatchMode=no_dispatch/, 'hook fix-slice missing-evidence path should stay no-dispatch'); assert.match(fixSliceWithoutEvidenceInjected, /autoChainAllowed=false/, 'hook fix-slice missing-evidence path should not allow auto-chain'); assert.match(fixSliceWithoutEvidenceInjected, /reason=blocker evidence missing for retry\/fix transition/, 'hook fix-slice missing-evidence path should mention missing blocker evidence'); assert.match(fixSliceWithoutEvidenceInjected, /requiredEvidence=blockerEvidence/, 'hook fix-slice missing-evidence path should require blockerEvidence'); const specReviewWithoutImplementationEvidenceInjected = await withPatchedWrapper(buildWrapperScript({ classification: 'long_task', silentCandidate: false, needsCheckpoint: false, needsSubagent: false, needsOwnerDecision: false, silentLaunchOk: true, requiredNextAction: 'dispatch_spec_review', autoChainDispatchEvidence: { action: 'dispatch_spec_review', dispatched: true, event: 'dispatch', }, progressEvidence: { sessionKey: 'task-implementation-missing-evidence' }, externalizedCheckpointPath: 'checkpoints/task-implementation-missing-evidence.json', handoff: { mode: 'direct_reply' }, }), async () => runScenario(forceRecall, plannerOnlyRequestText)); assert.match(specReviewWithoutImplementationEvidenceInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook implementation missing-evidence path should emit auto-chain plan block'); assert.match(specReviewWithoutImplementationEvidenceInjected, /plannerStatus=blocked_by_evidence/, 'hook implementation missing-evidence path should block on missing evidence'); assert.match(specReviewWithoutImplementationEvidenceInjected, /derivedAction=none/, 'hook implementation missing-evidence path should not derive a dry-run action'); assert.match(specReviewWithoutImplementationEvidenceInjected, /dispatchMode=no_dispatch/, 'hook implementation missing-evidence path should stay no-dispatch'); assert.match(specReviewWithoutImplementationEvidenceInjected, /autoChainAllowed=false/, 'hook implementation missing-evidence path should not allow auto-chain'); assert.match(specReviewWithoutImplementationEvidenceInjected, /reason=implementation evidence missing for review-required next action/, 'hook implementation missing-evidence path should mention missing implementation evidence'); assert.match(specReviewWithoutImplementationEvidenceInjected, /requiredEvidence=executionEvidence/, 'hook implementation missing-evidence path should require executionEvidence'); const originalGateLock = await fs.readFile(gateLockPath, 'utf8'); const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'force-recall-gate-lock-')); const backupPath = path.join(tempDir, path.basename(gateLockPath)); await fs.writeFile(backupPath, originalGateLock, 'utf8'); await fs.writeFile(gateLockPath, '#!/usr/bin/env node\nprocess.exit(1);\n', 'utf8'); let degradedInjected; try { degradedInjected = await runScenario(forceRecall, requestText); } finally { const backup = await fs.readFile(backupPath, 'utf8'); await fs.writeFile(gateLockPath, backup, 'utf8'); await fs.rm(tempDir, { recursive: true, force: true }); } const degradedExpectedSnippets = [ '[LONG_TASK_GATE_LOCK]', 'gateStatus=degraded', 'gateRequired=unknown', 'HARD_GATE: Evaluator unavailable is not permission to claim silent continuation or next-task progression without verifiable progress evidence.', 'HARD_GATE: Fall back to a non-silent, evidence-preserving follow-up if you cannot prove checkpoint state or concrete execution.', ]; for (const snippet of degradedExpectedSnippets) { assert.match(degradedInjected, new RegExp(escapeRegex(snippet)), `missing degraded snippet: ${snippet}`); } process.stdout.write(JSON.stringify({ ok: true, gatePaths: { pass: passResult.gateStatus, fail: failResult.gateStatus, neutral: neutralResult.gateStatus, }, bodyPreview: injected.split('\n').slice(0, 35), }, null, 2) + '\n'); } main().catch((error) => { console.error(error); process.exitCode = 1; });