From 46fa3b8d49c8fef629251f35532bd85a842be916 Mon Sep 17 00:00:00 2001 From: "openclaw@cowbay.org" Date: Fri, 24 Apr 2026 20:29:08 +0800 Subject: [PATCH] fix continuity clean-room install verification --- hooks/force-recall/handler.ts | 54 ++- .../continuity/src/adapters/force-recall.mjs | 32 +- plugins/continuity/src/config/defaults.mjs | 4 + plugins/continuity/src/config/schema.mjs | 68 +-- .../continuity/src/continuity/evaluator.mjs | 2 + plugins/continuity/src/continuity/types.md | 43 +- plugins/continuity/src/index.mjs | 25 ++ .../test/continuity.config.test.mjs | 50 +++ .../test/continuity.plugin.test.mjs | 43 ++ .../continuity/test/continuity.smoke.test.mjs | 23 ++ scripts/long_task_gate_lock.mjs | 390 ++++++++++++++++++ scripts/long_task_governor_wrapper.mjs | 261 ++++++++++++ scripts/plan_long_task_auto_chain.mjs | 182 ++++++++ .../test_force_recall_long_task_preflight.mjs | 17 +- scripts/test_long_task_gate_lock.mjs | 197 +++++++++ scripts/test_long_task_governor_wrapper.mjs | 179 ++++++++ scripts/test_plan_long_task_auto_chain.mjs | 240 +++++++++++ 17 files changed, 1765 insertions(+), 45 deletions(-) create mode 100644 scripts/long_task_gate_lock.mjs create mode 100644 scripts/long_task_governor_wrapper.mjs create mode 100644 scripts/plan_long_task_auto_chain.mjs create mode 100644 scripts/test_long_task_gate_lock.mjs create mode 100644 scripts/test_long_task_governor_wrapper.mjs create mode 100644 scripts/test_plan_long_task_auto_chain.mjs diff --git a/hooks/force-recall/handler.ts b/hooks/force-recall/handler.ts index 9c229ff..654b3db 100644 --- a/hooks/force-recall/handler.ts +++ b/hooks/force-recall/handler.ts @@ -394,14 +394,37 @@ const continuityAdapterModuleCache = new Map { const adapterPath = path.join(workspaceDir, "plugins", "continuity", "src", "index.mjs"); - let modulePromise = continuityAdapterModuleCache.get(adapterPath); - if (!modulePromise) { - modulePromise = import(pathToFileURL(adapterPath).href).catch(() => null); - continuityAdapterModuleCache.set(adapterPath, modulePromise); + try { + const stat = await fs.stat(adapterPath); + const cacheKey = `${adapterPath}?mtimeMs=${stat.mtimeMs}`; + let modulePromise = continuityAdapterModuleCache.get(cacheKey); + + if (!modulePromise) { + modulePromise = import(pathToFileURL(adapterPath).href + `?mtimeMs=${stat.mtimeMs}`).catch(() => null); + continuityAdapterModuleCache.set(cacheKey, modulePromise); + } + + return modulePromise; + } catch { + return null; } +} - return modulePromise; +async function readContinuityPluginConfigOverrides(workspaceDir: string): Promise> { + const defaultsPath = path.join(workspaceDir, "plugins", "continuity", "src", "config", "defaults.mjs"); + const source = await safeReadText(defaultsPath); + if (!source) return {}; + + const forceRecallLabel = source.match(/forceRecall:\s*\{[\s\S]*?injectBlockLabel:\s*['"]([^'"]+)['"]/); + const genericPreflightLabel = source.match(/genericPreflight:\s*\{[\s\S]*?injectBlockLabel:\s*['"]([^'"]+)['"]/); + + return { + adapter: { + forceRecall: forceRecallLabel ? { injectBlockLabel: forceRecallLabel[1] } : {}, + genericPreflight: genericPreflightLabel ? { injectBlockLabel: genericPreflightLabel[1] } : {}, + }, + }; } async function evaluateApprovedPlanContinuityViaPlugin(workspaceDir: string, wrapperResult: any, autoChainPlanResult: AutoChainPlanResult | null): Promise<{ input: Record | null; result: ApprovedPlanContinuityResult | null; block: string; } | null> { @@ -409,10 +432,27 @@ async function evaluateApprovedPlanContinuityViaPlugin(workspaceDir: string, wra const runAdapter = adapterModule?.runForceRecallContinuityAdapter; if (typeof runAdapter !== "function") return null; + const configOverrides = await readContinuityPluginConfigOverrides(workspaceDir); + return runAdapter({ wrapperResult, autoChainPlanResult, - config: adapterModule?.defaultConfig ?? {}, + config: { + ...(adapterModule?.defaultConfig ?? {}), + ...configOverrides, + adapter: { + ...(((adapterModule?.defaultConfig ?? {}) as any)?.adapter ?? {}), + ...((configOverrides as any)?.adapter ?? {}), + forceRecall: { + ...((((adapterModule?.defaultConfig ?? {}) as any)?.adapter?.forceRecall) ?? {}), + ...(((configOverrides as any)?.adapter?.forceRecall) ?? {}), + }, + genericPreflight: { + ...((((adapterModule?.defaultConfig ?? {}) as any)?.adapter?.genericPreflight) ?? {}), + ...(((configOverrides as any)?.adapter?.genericPreflight) ?? {}), + }, + }, + }, }); } @@ -444,6 +484,8 @@ async function buildApprovedPlanContinuityBlock(workspaceDir: string, wrapperRes if (result.reason === 'missing_auto_next_dispatch') { lines.push("- HARD_GATE: Do not stop at this completed-task boundary."); lines.push("- HARD_GATE: Auto-dispatch the next task in the same approved plan, unless waiting_user, blocked, pending_verification, or high-risk stop applies."); + lines.push("- HARD_GATE: Do not hand control back to the user with an ordinary progress update while auto-next is still obligatory."); + lines.push("- HARD_GATE: If you cannot prove the next dispatch, convert this into an explicit continuity failure instead of a normal status report."); } else { lines.push("- HARD_GATE: Route back to continuity failure until a real next dispatch receipt exists, unless closure state is waiting_user, blocked, or pending_verification."); } diff --git a/plugins/continuity/src/adapters/force-recall.mjs b/plugins/continuity/src/adapters/force-recall.mjs index 7392db8..ab6bb73 100644 --- a/plugins/continuity/src/adapters/force-recall.mjs +++ b/plugins/continuity/src/adapters/force-recall.mjs @@ -1,3 +1,7 @@ +import { + normalizeContinuityEngineInput, + createContinuityEngineContract, +} from '../continuity/engine.mjs'; import { evaluateContinuity, buildContinuityGateBlock } from '../continuity/evaluator.mjs'; function isNonEmptyString(value) { @@ -29,7 +33,7 @@ export function buildApprovedPlanContinuityInput(wrapperResult, autoChainPlanRes const taskBoundaryStop = wrapperResult?.taskBoundaryStop === true || replyClosureState === 'completed'; const highRiskStop = wrapperResult?.highRiskStop === true; - return { + return normalizeContinuityEngineInput({ planId: wrapperResult?.planId ?? 'hook-preflight-approved-plan', currentTask: wrapperResult?.currentTask ?? wrapperResult?.requiredNextAction ?? 'hook-preflight-task', taskState: wrapperResult?.taskState ?? (plannerDerivedAction ? 'complete' : null), @@ -40,7 +44,11 @@ export function buildApprovedPlanContinuityInput(wrapperResult, autoChainPlanRes sameApprovedPlan, taskBoundaryStop, highRiskStop, - }; + metadata: { + adapterSource: 'force-recall', + classification: wrapperResult?.classification ?? null, + }, + }); } export function createForceRecallContinuityAdapter(config = {}) { @@ -50,10 +58,22 @@ export function createForceRecallContinuityAdapter(config = {}) { return { evaluate({ wrapperResult, autoChainPlanResult = null }) { const input = buildApprovedPlanContinuityInput(wrapperResult, autoChainPlanResult); - if (!input) return { input: null, result: null, block: '' }; - const result = evaluateContinuity(input, { legalTerminalStates }); - const block = buildContinuityGateBlock(result, { legalTerminalStates, label }); - return { input, result, block }; + if (!input) { + return createContinuityEngineContract({ + input: null, + evaluation: null, + block: '', + options: { adapterName: 'force-recall', label }, + }); + } + const evaluation = evaluateContinuity(input, { legalTerminalStates }); + const block = buildContinuityGateBlock(evaluation, { legalTerminalStates, label }); + return createContinuityEngineContract({ + input, + evaluation, + block, + options: { adapterName: 'force-recall', label }, + }); }, }; } diff --git a/plugins/continuity/src/config/defaults.mjs b/plugins/continuity/src/config/defaults.mjs index 0ed8484..d42555f 100644 --- a/plugins/continuity/src/config/defaults.mjs +++ b/plugins/continuity/src/config/defaults.mjs @@ -11,6 +11,10 @@ export const defaultConfig = Object.freeze({ enabled: true, injectBlockLabel: 'APPROVED_PLAN_CONTINUITY_GATE', }, + genericPreflight: { + enabled: true, + injectBlockLabel: 'APPROVED_PLAN_CONTINUITY_GATE', + }, }, }); diff --git a/plugins/continuity/src/config/schema.mjs b/plugins/continuity/src/config/schema.mjs index f6f069d..2ea3f51 100644 --- a/plugins/continuity/src/config/schema.mjs +++ b/plugins/continuity/src/config/schema.mjs @@ -13,6 +13,10 @@ export const continuityConfigSchema = Object.freeze({ enabled: 'boolean', injectBlockLabel: 'string', }, + genericPreflight: { + enabled: 'boolean', + injectBlockLabel: 'string', + }, }, }); @@ -27,8 +31,8 @@ const TOP_LEVEL_KEYS = new Set([ 'adapter', ]); -const ADAPTER_KEYS = new Set(['forceRecall']); -const FORCE_RECALL_KEYS = new Set(['enabled', 'injectBlockLabel']); +const ADAPTER_KEYS = new Set(['forceRecall', 'genericPreflight']); +const ADAPTER_CONFIG_KEYS = new Set(['enabled', 'injectBlockLabel']); function isPlainObject(value) { return Boolean(value) && typeof value === 'object' && !Array.isArray(value); @@ -59,6 +63,35 @@ function validateStringArray(errors, value, fieldName) { }); } +function normalizeAdapterConfig(baseAdapterConfig, inputAdapterConfig) { + return { + ...baseAdapterConfig, + ...(isPlainObject(inputAdapterConfig) ? inputAdapterConfig : {}), + injectBlockLabel: typeof inputAdapterConfig?.injectBlockLabel === 'string' + ? inputAdapterConfig.injectBlockLabel.trim() + : baseAdapterConfig.injectBlockLabel, + }; +} + +function validateNamedAdapter(errors, adapterInput, adapterKey) { + if (!(adapterKey in adapterInput)) return; + + if (!isPlainObject(adapterInput[adapterKey])) { + errors.push(`adapter.${adapterKey}: expected object`); + return; + } + + pushUnknownKeyErrors(errors, adapterInput[adapterKey], ADAPTER_CONFIG_KEYS, `adapter.${adapterKey}.`); + + if ('enabled' in adapterInput[adapterKey] && typeof adapterInput[adapterKey].enabled !== 'boolean') { + errors.push(`adapter.${adapterKey}.enabled: expected boolean`); + } + + if ('injectBlockLabel' in adapterInput[adapterKey] && !isNonEmptyString(adapterInput[adapterKey].injectBlockLabel)) { + errors.push(`adapter.${adapterKey}.injectBlockLabel: expected non-empty string`); + } +} + export function normalizeContinuityConfig(input = {}) { const base = cloneDefaultConfig(); @@ -79,17 +112,11 @@ export function normalizeContinuityConfig(input = {}) { adapter: { ...base.adapter, ...(isPlainObject(input.adapter) ? input.adapter : {}), - forceRecall: { - ...base.adapter.forceRecall, - ...(isPlainObject(input.adapter?.forceRecall) ? input.adapter.forceRecall : {}), - }, + forceRecall: normalizeAdapterConfig(base.adapter.forceRecall, input.adapter?.forceRecall), + genericPreflight: normalizeAdapterConfig(base.adapter.genericPreflight, input.adapter?.genericPreflight), }, }; - if (typeof normalized.adapter.forceRecall.injectBlockLabel === 'string') { - normalized.adapter.forceRecall.injectBlockLabel = normalized.adapter.forceRecall.injectBlockLabel.trim(); - } - return normalized; } @@ -140,25 +167,8 @@ export function validateContinuityConfig(input = {}) { errors.push('adapter: expected object'); } else { pushUnknownKeyErrors(errors, input.adapter, ADAPTER_KEYS, 'adapter.'); - - if ('forceRecall' in input.adapter) { - if (!isPlainObject(input.adapter.forceRecall)) { - errors.push('adapter.forceRecall: expected object'); - } else { - pushUnknownKeyErrors(errors, input.adapter.forceRecall, FORCE_RECALL_KEYS, 'adapter.forceRecall.'); - - if ('enabled' in input.adapter.forceRecall && typeof input.adapter.forceRecall.enabled !== 'boolean') { - errors.push('adapter.forceRecall.enabled: expected boolean'); - } - - if ( - 'injectBlockLabel' in input.adapter.forceRecall - && !isNonEmptyString(input.adapter.forceRecall.injectBlockLabel) - ) { - errors.push('adapter.forceRecall.injectBlockLabel: expected non-empty string'); - } - } - } + validateNamedAdapter(errors, input.adapter, 'forceRecall'); + validateNamedAdapter(errors, input.adapter, 'genericPreflight'); } } diff --git a/plugins/continuity/src/continuity/evaluator.mjs b/plugins/continuity/src/continuity/evaluator.mjs index f2f4f87..aada80d 100644 --- a/plugins/continuity/src/continuity/evaluator.mjs +++ b/plugins/continuity/src/continuity/evaluator.mjs @@ -110,6 +110,8 @@ export function buildContinuityGateBlock(result, options = {}) { if (result.reason === 'missing_auto_next_dispatch') { lines.push('- HARD_GATE: Do not stop at this completed-task boundary.'); lines.push(`- HARD_GATE: Auto-dispatch the next task in the same approved plan, unless ${terminalStates.join(', ')}, or high-risk stop applies.`); + lines.push('- HARD_GATE: Do not hand control back to the user with an ordinary progress update while auto-next is still obligatory.'); + lines.push('- HARD_GATE: If you cannot prove the next dispatch, convert this into an explicit continuity failure instead of a normal status report.'); } else { lines.push(`- HARD_GATE: Route back to continuity failure until a real next dispatch receipt exists, unless closure state is ${terminalStates.join(', ')}.`); } diff --git a/plugins/continuity/src/continuity/types.md b/plugins/continuity/src/continuity/types.md index 0af2f78..91b11dd 100644 --- a/plugins/continuity/src/continuity/types.md +++ b/plugins/continuity/src/continuity/types.md @@ -1,4 +1,41 @@ -# Continuity Types (MVP) +# Continuity Types + +## Host-agnostic continuity engine input + +The generalized engine operates on a host-agnostic input object. Adapters are responsible for mapping host or hook specific context into this shape. + +Minimum practical fields: + +- `planId`: string +- `currentTask`: string +- `taskState`: string | null +- `nextDerivedAction`: object | null +- `replyClosureState`: string | null +- `dispatchReceipt`: object | null +- `nextTaskKnown`: boolean +- `sameApprovedPlan`: boolean +- `taskBoundaryStop`: boolean +- `highRiskStop`: boolean + +Optional fields: + +- `nextTaskId`: string | null +- `nextTaskKey`: string | null +- `derivedAction`: object | null +- `metadata`: object + +Normalization entrypoint: + +- `normalizeContinuityEngineInput(input)` + +Engine contract returned by generalized adapters: + +- `input`: normalized engine input or `null` +- `result`: summarized engine result object +- `evaluation`: raw evaluator result or `null` +- `block`: injected prompt block string +- `meta.adapterName`: adapter identifier +- `meta.hostAgnostic`: always `true` ## Receipt contract @@ -20,4 +57,6 @@ The MVP receipt validator contract uses this minimum shape: ## Notes - This contract is intentionally minimal and keeps file I/O separate. -- It mirrors the current approved-plan dispatch receipt fields used by the existing continuity scripts. +- The engine is host-agnostic; host-specific behavior belongs in adapters. +- `force-recall` remains the parity adapter for the current hook path. +- `generic-preflight` is the minimal generalized adapter/runner for non-`force-recall` integration. diff --git a/plugins/continuity/src/index.mjs b/plugins/continuity/src/index.mjs index 0f34656..6938fd4 100644 --- a/plugins/continuity/src/index.mjs +++ b/plugins/continuity/src/index.mjs @@ -4,6 +4,11 @@ import { validateContinuityConfig, normalizeContinuityConfig, } from './config/schema.mjs'; +import { + normalizeContinuityEngineInput, + createContinuityEngineResult, + createContinuityEngineContract, +} from './continuity/engine.mjs'; import { evaluateContinuity, buildContinuityGateBlock, @@ -24,6 +29,12 @@ import { createForceRecallContinuityAdapter, runForceRecallContinuityAdapter, } from './adapters/force-recall.mjs'; +import { + buildGenericContinuityInput, + createGenericPreflightContinuityAdapter, + runGenericPreflightContinuityAdapter, + runManualContinuityPreflight, +} from './adapters/generic-preflight.mjs'; export { defaultConfig, @@ -31,6 +42,9 @@ export { continuityConfigSchema, validateContinuityConfig, normalizeContinuityConfig, + normalizeContinuityEngineInput, + createContinuityEngineResult, + createContinuityEngineContract, evaluateContinuity, buildContinuityGateBlock, hasValidDispatchReceipt, @@ -43,6 +57,10 @@ export { buildApprovedPlanContinuityInput, createForceRecallContinuityAdapter, runForceRecallContinuityAdapter, + buildGenericContinuityInput, + createGenericPreflightContinuityAdapter, + runGenericPreflightContinuityAdapter, + runManualContinuityPreflight, }; export default { @@ -51,6 +69,9 @@ export default { continuityConfigSchema, validateContinuityConfig, normalizeContinuityConfig, + normalizeContinuityEngineInput, + createContinuityEngineResult, + createContinuityEngineContract, evaluateContinuity, buildContinuityGateBlock, hasValidDispatchReceipt, @@ -63,4 +84,8 @@ export default { buildApprovedPlanContinuityInput, createForceRecallContinuityAdapter, runForceRecallContinuityAdapter, + buildGenericContinuityInput, + createGenericPreflightContinuityAdapter, + runGenericPreflightContinuityAdapter, + runManualContinuityPreflight, }; diff --git a/plugins/continuity/test/continuity.config.test.mjs b/plugins/continuity/test/continuity.config.test.mjs index de1c934..6cc221f 100644 --- a/plugins/continuity/test/continuity.config.test.mjs +++ b/plugins/continuity/test/continuity.config.test.mjs @@ -52,6 +52,19 @@ test('normalizes missing fields from defaults', () => { assert.equal(normalized.receiptDir, defaultConfig.receiptDir); assert.deepEqual(normalized.legalTerminalStates, defaultConfig.legalTerminalStates); assert.notEqual(normalized.legalTerminalStates, defaultConfig.legalTerminalStates); + assert.equal(normalized.adapter.genericPreflight.enabled, true); +}); + +test('normalizes generic preflight adapter block label', () => { + const normalized = normalizeContinuityConfig({ + adapter: { + genericPreflight: { + injectBlockLabel: ' CUSTOM_GENERIC_GATE ', + }, + }, + }); + + assert.equal(normalized.adapter.genericPreflight.injectBlockLabel, 'CUSTOM_GENERIC_GATE'); }); test('rejects non-array legalTerminalStates', () => { @@ -94,6 +107,17 @@ test('rejects malformed adapter.forceRecall shape', () => { assert.match(result.errors.join('\n'), /adapter\.forceRecall/); }); +test('rejects malformed adapter.genericPreflight shape', () => { + const result = validateContinuityConfig({ + adapter: { + genericPreflight: false, + }, + }); + + assert.equal(result.ok, false); + assert.match(result.errors.join('\n'), /adapter\.genericPreflight/); +}); + test('rejects malformed adapter.forceRecall.enabled type', () => { const result = validateContinuityConfig({ adapter: { @@ -120,6 +144,32 @@ test('rejects malformed adapter.forceRecall.injectBlockLabel type', () => { assert.match(result.errors.join('\n'), /injectBlockLabel/); }); +test('rejects malformed adapter.genericPreflight.enabled type', () => { + const result = validateContinuityConfig({ + adapter: { + genericPreflight: { + enabled: 'yes', + }, + }, + }); + + assert.equal(result.ok, false); + assert.match(result.errors.join('\n'), /adapter\.genericPreflight\.enabled/); +}); + +test('rejects malformed adapter.genericPreflight.injectBlockLabel type', () => { + const result = validateContinuityConfig({ + adapter: { + genericPreflight: { + injectBlockLabel: 42, + }, + }, + }); + + assert.equal(result.ok, false); + assert.match(result.errors.join('\n'), /adapter\.genericPreflight\.injectBlockLabel/); +}); + test('rejects unknown top-level key', () => { const result = validateContinuityConfig({ unexpected: true, diff --git a/plugins/continuity/test/continuity.plugin.test.mjs b/plugins/continuity/test/continuity.plugin.test.mjs index 31a701e..c11e647 100644 --- a/plugins/continuity/test/continuity.plugin.test.mjs +++ b/plugins/continuity/test/continuity.plugin.test.mjs @@ -1,8 +1,10 @@ import assert from 'node:assert/strict'; import plugin, { createForceRecallContinuityAdapter, + createGenericPreflightContinuityAdapter, defaultConfig, evaluateContinuity, + runManualContinuityPreflight, } from '../src/index.mjs'; function test(name, fn) { @@ -19,6 +21,8 @@ test('index exports plugin surface', () => { assert.equal(plugin.name, '@openclaw/plugin-continuity'); assert.equal(typeof evaluateContinuity, 'function'); assert.equal(defaultConfig.adapter.forceRecall.enabled, true); + assert.equal(defaultConfig.adapter.genericPreflight.enabled, true); + assert.equal(typeof plugin.runGenericPreflightContinuityAdapter, 'function'); }); test('adapter preserves current hook parity for plain wrapper next-action mapping', () => { @@ -36,6 +40,8 @@ test('adapter preserves current hook parity for plain wrapper next-action mappin }); assert.equal(out.result.ok, true); + assert.equal(out.meta.adapterName, 'force-recall'); + assert.equal(out.meta.hostAgnostic, true); assert.match(out.block, /status=pass/); }); @@ -60,4 +66,41 @@ test('adapter fails when planner-derived auto-next boundary exists without dispa assert.match(out.block, /continuity_failure/); }); +test('generic preflight adapter evaluates host-agnostic source payload', () => { + const adapter = createGenericPreflightContinuityAdapter(defaultConfig); + const out = adapter.evaluate({ + planId: 'plan-generic', + currentTask: 'task-generic', + taskState: 'complete', + nextTaskKnown: true, + sameApprovedPlan: true, + taskBoundaryStop: true, + nextTaskId: 'task-next', + nextDerivedAction: { type: 'message_subagent', task: 'continue' }, + replyClosureState: 'completed', + dispatchReceipt: null, + }); + + assert.equal(out.result.ok, false); + assert.equal(out.result.reason, 'missing_auto_next_dispatch'); + assert.equal(out.meta.adapterName, 'generic-preflight'); + assert.equal(out.meta.hostAgnostic, true); + assert.equal(out.input.planId, 'plan-generic'); +}); + +test('manual continuity preflight runner works without force-recall hook', () => { + const out = runManualContinuityPreflight({ + config: defaultConfig, + planId: 'plan-manual', + currentTask: 'task-manual', + taskState: 'complete', + nextDerivedAction: { type: 'message_subagent', task: 'continue' }, + replyClosureState: 'waiting_user', + }); + + assert.equal(out.result.ok, true); + assert.match(out.block, /APPROVED_PLAN_CONTINUITY_GATE/); + assert.equal(out.meta.adapterName, 'generic-preflight'); +}); + console.log('continuity.plugin.test.mjs PASS'); diff --git a/plugins/continuity/test/continuity.smoke.test.mjs b/plugins/continuity/test/continuity.smoke.test.mjs index 2b8bd7d..607f1dd 100644 --- a/plugins/continuity/test/continuity.smoke.test.mjs +++ b/plugins/continuity/test/continuity.smoke.test.mjs @@ -1,6 +1,7 @@ import assert from 'node:assert/strict'; import plugin, { runForceRecallContinuityAdapter, + runGenericPreflightContinuityAdapter, validateContinuityConfig, } from '../src/index.mjs'; @@ -25,4 +26,26 @@ const smoke = runForceRecallContinuityAdapter({ assert.equal(smoke.result.ok, false); assert.equal(smoke.result.reason, 'missing_auto_next_dispatch'); assert.match(smoke.block, /APPROVED_PLAN_CONTINUITY_GATE/); +assert.equal(smoke.meta.adapterName, 'force-recall'); + +const genericSmoke = runGenericPreflightContinuityAdapter({ + config: plugin.defaultConfig, + source: { + planId: 'plan-generic-smoke', + currentTask: 'task-9', + taskState: 'complete', + nextTaskKnown: true, + sameApprovedPlan: true, + taskBoundaryStop: true, + nextTaskId: 'task-10', + nextDerivedAction: { type: 'message_subagent', task: 'continue' }, + replyClosureState: 'completed', + dispatchReceipt: null, + }, +}); + +assert.equal(genericSmoke.result.ok, false); +assert.equal(genericSmoke.result.reason, 'missing_auto_next_dispatch'); +assert.match(genericSmoke.block, /APPROVED_PLAN_CONTINUITY_GATE/); +assert.equal(genericSmoke.meta.adapterName, 'generic-preflight'); console.log('continuity.smoke.test.mjs PASS'); diff --git a/scripts/long_task_gate_lock.mjs b/scripts/long_task_gate_lock.mjs new file mode 100644 index 0000000..61714b1 --- /dev/null +++ b/scripts/long_task_gate_lock.mjs @@ -0,0 +1,390 @@ +#!/usr/bin/env node +import fs from 'fs'; + +const EVIDENCE_FIELDS = Object.freeze({ + externalizedCheckpoint: Object.freeze([ + 'externalizedCheckpointPath', + 'externalizedTrigger', + 'checkpointPath', + ]), + concreteNextAction: Object.freeze([ + 'nextStep', + 'requiredNextAction', + 'concreteNextAction', + ]), + buttonPathMode: Object.freeze([ + 'handoffMode', + 'handoff.mode', + 'replyClosureMode', + ]), + progressionClaim: Object.freeze([ + 'progressionClaim', + 'claimedProgression', + 'statusSummary', + ]), + executionEvidence: Object.freeze([ + 'executionEvidence', + 'toolCallEvidence', + 'dispatchEvidence', + 'fileChangeEvidence', + 'verificationEvidence', + 'checkpointArtifactEvidence', + ]), + autoChainNextAction: Object.freeze([ + 'autoChainNextAction', + 'auto_chain_next_action', + ]), + autoChainDispatchEvidence: Object.freeze([ + 'autoChainDispatchEvidence', + 'auto_chain_dispatch_evidence', + ]), + progressEvidence: Object.freeze([ + 'progressEvidence', + 'progressEvidence.sessionKey', + 'progressEvidence.runId', + 'progressEvidence.modified_files', + 'progressEvidence.verificationResult', + 'sessionKey', + 'runId', + 'modified_files', + 'verificationResult', + ]), +}); + +const GATE_REQUIREMENTS = Object.freeze({ + externalizedCheckpoint: Object.freeze({ + evidenceKey: 'externalizedCheckpoint', + acceptedFields: EVIDENCE_FIELDS.externalizedCheckpoint, + requiredValue: 'non-empty string', + }), + concreteNextAction: Object.freeze({ + evidenceKey: 'concreteNextAction', + acceptedFields: EVIDENCE_FIELDS.concreteNextAction, + requiredValue: 'non-empty string', + }), + buttonPathMode: Object.freeze({ + evidenceKey: 'buttonPathMode', + acceptedFields: EVIDENCE_FIELDS.buttonPathMode, + requiredValue: 'button_path', + }), + executionEvidence: Object.freeze({ + evidenceKey: 'executionEvidence', + acceptedFields: EVIDENCE_FIELDS.executionEvidence, + requiredValue: 'tool call, dispatch, file change, verification output, or checkpoint artifact evidence', + }), + autoChainDispatchEvidence: Object.freeze({ + evidenceKey: 'autoChainDispatchEvidence', + acceptedFields: EVIDENCE_FIELDS.autoChainDispatchEvidence, + requiredValue: 'dispatched-action evidence for the explicit auto-chain next action', + }), + progressEvidence: Object.freeze({ + evidenceKey: 'progressEvidence', + acceptedFields: EVIDENCE_FIELDS.progressEvidence, + requiredValue: 'sessionKey, runId, modified_files, verification result, or equivalent concrete progress evidence', + }), +}); + +function fail(code, message) { + process.stderr.write(`${code}: ${message}\n`); + process.exit(1); +} + +function parseArgs(argv) { + const args = { input: '', pretty: true }; + for (let i = 2; i < argv.length; i += 1) { + const arg = argv[i]; + if (arg === '--input') { + const value = argv[i + 1]; + if (!value || value.startsWith('--')) fail('CLI_ERROR', '--input requires a value'); + args.input = value; + i += 1; + } else if (arg === '--compact') { + args.pretty = false; + } else { + fail('CLI_ERROR', `unknown argument: ${arg}`); + } + } + return args; +} + +function readInput(path) { + if (!path || path === '-') return fs.readFileSync(0, 'utf8'); + return fs.readFileSync(path, 'utf8'); +} + +function parseJson(raw) { + try { + return JSON.parse(raw); + } catch { + fail('INVALID_JSON', 'input must be valid JSON'); + } +} + +function isLongTask(input) { + return input.classification === 'long_task'; +} + +function hasNonEmptyString(value) { + return typeof value === 'string' && value.trim().length > 0; +} + +function getPathValue(input, path) { + return path.split('.').reduce((current, key) => { + if (current === null || current === undefined) return undefined; + return current[key]; + }, input); +} + +function hasAnyNonEmptyString(input, fieldPaths) { + return fieldPaths.some((fieldPath) => hasNonEmptyString(getPathValue(input, fieldPath))); +} + +function hasAcceptedValue(input, fieldPaths, acceptedValue) { + return fieldPaths.some((fieldPath) => getPathValue(input, fieldPath) === acceptedValue); +} + +function describeRequirement(requirement) { + return { + evidenceKey: requirement.evidenceKey, + acceptedFields: [...requirement.acceptedFields], + requiredValue: requirement.requiredValue, + }; +} + +function hasExternalizedCheckpointPath(input) { + return hasAnyNonEmptyString(input, EVIDENCE_FIELDS.externalizedCheckpoint); +} + +function hasConcreteNextAction(input) { + return hasAnyNonEmptyString(input, EVIDENCE_FIELDS.concreteNextAction); +} + +function wantsSilentContinuation(input) { + if (typeof input.silentContinuation === 'boolean') return input.silentContinuation; + if (typeof input.silentCandidate === 'boolean') return input.silentCandidate; + if (typeof input.needsWaiting === 'boolean' && input.needsWaiting) return true; + if (typeof input.needsSubagent === 'boolean' && input.needsSubagent) return true; + return false; +} + +function claimsExecution(input) { + if (typeof input.claimedExecution === 'boolean') return input.claimedExecution; + if (typeof input.executionClaimed === 'boolean') return input.executionClaimed; + if (typeof input.status === 'string' && input.status === 'active') return true; + return false; +} + +function needsOwnerDecision(input) { + if (typeof input.needsOwnerDecision === 'boolean') return input.needsOwnerDecision; + return false; +} + +function usesButtonPath(input) { + return hasAcceptedValue(input, EVIDENCE_FIELDS.buttonPathMode, 'button_path'); +} + +function hasExecutionEvidence(input) { + return EVIDENCE_FIELDS.executionEvidence.some((fieldPath) => { + const value = getPathValue(input, fieldPath); + if (hasNonEmptyString(value)) return true; + if (Array.isArray(value)) return value.length > 0; + if (value && typeof value === 'object') return Object.keys(value).length > 0; + return false; + }); +} + +function hasExplicitAutoChainNextAction(input) { + return hasAnyNonEmptyString(input, EVIDENCE_FIELDS.autoChainNextAction); +} + +function getExplicitAutoChainNextAction(input) { + const nextAction = EVIDENCE_FIELDS.autoChainNextAction + .map((fieldPath) => getPathValue(input, fieldPath)) + .find((value) => hasNonEmptyString(value)); + + return hasNonEmptyString(nextAction) ? nextAction.trim() : ''; +} + +function isExecutableDispatchAction(action) { + if (!hasNonEmptyString(action)) return false; + return /^dispatch_[a-z0-9]+(?:_[a-z0-9]+)*$/i.test(action.trim()); +} + +function getNormalizedDispatchAction(value) { + if (!hasNonEmptyString(value)) return ''; + const normalized = value.trim(); + return isExecutableDispatchAction(normalized) ? normalized : ''; +} + +function getAutoChainDispatchEvidenceMatch(input) { + const nextAction = getExplicitAutoChainNextAction(input); + if (!isExecutableDispatchAction(nextAction)) return { required: false, matched: false }; + + for (const fieldPath of EVIDENCE_FIELDS.autoChainDispatchEvidence) { + const value = getPathValue(input, fieldPath); + if (!value) continue; + + if (hasNonEmptyString(value)) { + const directMatch = getNormalizedDispatchAction(value); + if (directMatch === nextAction) { + return { required: true, matched: true }; + } + continue; + } + + if (typeof value !== 'object' || Array.isArray(value)) continue; + + const candidates = [ + value.action, + value.dispatchedAction, + value.nextAction, + value.autoChainNextAction, + value.requiredNextAction, + value.concreteNextAction, + value.event, + value.type, + value.kind, + value.dispatchType, + value.dispatchAction, + ] + .map((candidate) => getNormalizedDispatchAction(candidate)) + .filter(Boolean); + + const declaresDispatch = [ + value.dispatched === true, + value.wasDispatched === true, + value.didDispatch === true, + value.dispatchEvent === true, + value.event === 'dispatch', + value.type === 'dispatch', + value.kind === 'dispatch', + value.dispatchType === 'dispatch', + ].some(Boolean); + + + if (declaresDispatch && candidates.includes(nextAction)) { + return { required: true, matched: true }; + } + } + + return { required: true, matched: false }; +} + +function hasAutoChainDispatchEvidence(input) { + return getAutoChainDispatchEvidenceMatch(input).matched; +} + +function requiresAutoChainDispatchEvidence(input) { + return getAutoChainDispatchEvidenceMatch(input).required; +} + +function hasProgressEvidence(input) { + return EVIDENCE_FIELDS.progressEvidence.some((fieldPath) => { + const value = getPathValue(input, fieldPath); + if (hasNonEmptyString(value)) return true; + if (Array.isArray(value)) return value.length > 0; + if (value && typeof value === 'object') return Object.keys(value).length > 0; + return false; + }); +} + +function claimsProgression(input) { + const progressionClaim = EVIDENCE_FIELDS.progressionClaim + .map((fieldPath) => getPathValue(input, fieldPath)) + .find((value) => hasNonEmptyString(value)); + + return hasNonEmptyString(progressionClaim); +} + +function claimsProgressionWithoutEvidence(input) { + if (!claimsProgression(input)) return false; + return !hasProgressEvidence(input); +} + +function evaluateGate(input) { + const gateRequired = isLongTask(input); + const reasons = []; + const requiredEvidence = []; + const allowedResponseModes = []; + + if (!gateRequired) { + return { + gateRequired: false, + gateStatus: 'not_applicable', + reasons: ['classification is not long_task'], + requiredEvidence: [], + allowedResponseModes: ['direct_reply'], + }; + } + + let failed = false; + + if (wantsSilentContinuation(input) && !hasExternalizedCheckpointPath(input)) { + failed = true; + reasons.push('silent long-task cannot continue without externalized checkpoint path'); + requiredEvidence.push(describeRequirement(GATE_REQUIREMENTS.externalizedCheckpoint)); + allowedResponseModes.push('non_silent_follow_up'); + } + + if (claimsExecution(input) && !hasConcreteNextAction(input)) { + failed = true; + reasons.push('claimed execution requires evidence of a concrete next action'); + requiredEvidence.push(describeRequirement(GATE_REQUIREMENTS.concreteNextAction)); + allowedResponseModes.push('checkpoint_only'); + } + + if (needsOwnerDecision(input) && !usesButtonPath(input)) { + failed = true; + reasons.push('owner decision flow must end in button-path, not plain text'); + requiredEvidence.push(describeRequirement(GATE_REQUIREMENTS.buttonPathMode)); + allowedResponseModes.push('button_path'); + } + + if (claimsProgressionWithoutEvidence(input)) { + failed = true; + reasons.push('claimed progression without concrete progress evidence is forbidden'); + requiredEvidence.push(describeRequirement(GATE_REQUIREMENTS.progressEvidence)); + allowedResponseModes.push('evidence_preserving_follow_up'); + } + + if (requiresAutoChainDispatchEvidence(input) && !hasAutoChainDispatchEvidence(input)) { + failed = true; + reasons.push('explicit auto-chain next action requires dispatched-action evidence'); + requiredEvidence.push(describeRequirement(GATE_REQUIREMENTS.autoChainDispatchEvidence)); + allowedResponseModes.push('dispatch_required'); + } + + if (!failed) { + reasons.push('required long-task gate evidence is present or no gated condition was triggered'); + allowedResponseModes.push(needsOwnerDecision(input) ? 'button_path' : 'direct_reply'); + if (wantsSilentContinuation(input)) allowedResponseModes.push('silent_continuation'); + } + + return { + gateRequired: true, + gateStatus: failed ? 'fail' : 'pass', + reasons, + requiredEvidence, + allowedResponseModes: [...new Set(allowedResponseModes)], + }; +} + +function main() { + const args = parseArgs(process.argv); + const raw = readInput(args.input); + const input = parseJson(raw); + const output = evaluateGate(input); + process.stdout.write(JSON.stringify(output, null, args.pretty ? 2 : 0) + '\n'); +} + +export { evaluateGate }; + +const isDirectRun = process.argv[1] && fs.realpathSync(process.argv[1]) === fs.realpathSync(new URL(import.meta.url)); + +if (isDirectRun) { + try { + main(); + } catch (error) { + fail('CLI_ERROR', error && error.message ? error.message : 'unexpected error'); + } +} diff --git a/scripts/long_task_governor_wrapper.mjs b/scripts/long_task_governor_wrapper.mjs new file mode 100644 index 0000000..ec6c79b --- /dev/null +++ b/scripts/long_task_governor_wrapper.mjs @@ -0,0 +1,261 @@ +#!/usr/bin/env node +import fs from 'fs'; +import path from 'path'; + +function fail(code, message) { + process.stderr.write(`${code}: ${message}\n`); + process.exit(1); +} + +function parseArgs(argv) { + const args = { input: '', pretty: true }; + for (let i = 2; i < argv.length; i += 1) { + const arg = argv[i]; + if (arg === '--input') { + const value = argv[i + 1]; + if (!value || value.startsWith('--')) fail('CLI_ERROR', '--input requires a value'); + args.input = value; + i += 1; + } else if (arg === '--compact') { + args.pretty = false; + } else { + fail('CLI_ERROR', `unknown argument: ${arg}`); + } + } + return args; +} + +function readInput(path) { + if (!path || path === '-') return fs.readFileSync(0, 'utf8'); + return fs.readFileSync(path, 'utf8'); +} + +function normalizeRequest(raw) { + let data; + try { + data = JSON.parse(raw); + } catch { + fail('INVALID_JSON', 'input must be valid JSON'); + } + + return { + requestText: data.requestText || '', + hasFilesOrSystems: Boolean(data.hasFilesOrSystems), + needsWaiting: Boolean(data.needsWaiting), + needsSubagent: Boolean(data.needsSubagent), + needsOwnerDecision: Boolean(data.needsOwnerDecision), + canReplyNow: Boolean(data.canReplyNow), + taskName: data.taskName || 'Untitled long-task', + currentStep: data.currentStep || 'Classifying request', + nextStep: data.nextStep || 'Define next actionable step', + nextReportCondition: data.nextReportCondition || 'After next meaningful milestone', + waitingOn: data.waitingOn || 'none', + blocker: data.blocker || 'none', + checkpointTrigger: data.checkpointTrigger || '', + externalizedTrigger: data.externalizedTrigger || '', + triggerKind: data.triggerKind || '', + }; +} + +function inferFromRequestText(input) { + const text = (input.requestText || '').toLowerCase(); + const inferred = { ...input }; + + if (!input.canReplyNow && /\b(can( not|'t)? use|check|inspect|investigate|review|verify|fix|debug|analyze|analyse|compare|deploy|run)\b/.test(text)) { + inferred.hasFilesOrSystems = true; + } + if (!input.needsOwnerDecision && /\b(accept|reject|approve|decision|choose|pick|verdict)\b/.test(text)) { + inferred.needsOwnerDecision = true; + } + if (!input.needsWaiting && /\b(wait|later|after|async|background|follow up|follow-up)\b/.test(text)) { + inferred.needsWaiting = true; + } + if (!input.needsSubagent && /\bsubagent\b/.test(text)) { + inferred.needsSubagent = true; + } + if (!input.checkpointTrigger && inferred.needsSubagent) { + inferred.checkpointTrigger = 'when delegated work returns or the next checkpoint fires'; + } + if (!input.externalizedTrigger && inferred.needsSubagent) { + inferred.externalizedTrigger = 'wrapper-derived checkpoint artifact'; + } + if (!input.triggerKind && inferred.needsSubagent) { + inferred.triggerKind = 'artifact'; + } + + return inferred; +} + +function classify(input) { + const classification = input.canReplyNow && !input.hasFilesOrSystems && !input.needsWaiting && !input.needsSubagent && !input.needsOwnerDecision + ? 'general_chat' + : 'long_task'; + + const silentCandidate = classification === 'long_task' && (input.needsWaiting || input.needsSubagent || Boolean(input.checkpointTrigger)); + const needsCheckpoint = classification === 'long_task'; + + return { + classification, + silentCandidate, + needsOwnerDecision: input.needsOwnerDecision, + needsCheckpoint, + needsSubagent: input.needsSubagent, + }; +} + +function bootstrapTaskState(input, classificationResult) { + if (classificationResult.classification !== 'long_task') return null; + return { + task_name: input.taskName, + status: input.blocker !== 'none' ? 'blocked' : (input.waitingOn !== 'none' ? 'waiting_user' : 'active'), + current_step: input.currentStep, + next_step: input.nextStep, + next_report_condition: input.nextReportCondition, + waiting_on: input.waitingOn, + blocker: input.blocker, + silent: classificationResult.silentCandidate, + }; +} + +function toSlug(value) { + return String(value || '') + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, '') + .slice(0, 48); +} + +function ensureCheckpointArtifact(externalizedCheckpointPath, input, classificationResult) { + if (classificationResult.classification !== 'long_task') return null; + if (!classificationResult.silentCandidate) return null; + if (!externalizedCheckpointPath) return null; + + const artifactPath = path.resolve(process.cwd(), externalizedCheckpointPath); + const artifact = { + kind: 'long_task_checkpoint', + triggerKind: input.triggerKind || 'artifact', + checkpointTrigger: input.checkpointTrigger || '', + currentStep: input.currentStep || '', + nextStep: input.nextStep || '', + waitingOn: input.waitingOn || '', + blocker: input.blocker || '', + }; + + fs.mkdirSync(path.dirname(artifactPath), { recursive: true }); + fs.writeFileSync(artifactPath, JSON.stringify(artifact, null, 2) + '\n', 'utf8'); + + const stats = fs.statSync(artifactPath); + const readable = fs.readFileSync(artifactPath, 'utf8'); + + return { + absolutePath: artifactPath, + bytes: stats.size, + readable: readable.trim().length > 0, + }; +} + +function buildExternalizedCheckpointPath(input, classificationResult) { + if (classificationResult.classification !== 'long_task') return ''; + if (!classificationResult.silentCandidate) return ''; + if (!input.externalizedTrigger) return ''; + + const taskSeed = [input.currentStep, input.nextStep, input.waitingOn, input.blocker] + .map((value) => toSlug(value)) + .filter(Boolean) + .join('-'); + const stableSeed = taskSeed || 'long-task'; + + return `checkpoints/${stableSeed}.json`; +} + +function buildProgressEvidence(input, classificationResult, externalizedCheckpointPath, checkpointArtifact) { + if (classificationResult.classification !== 'long_task') return null; + if (!classificationResult.silentCandidate) return null; + if (!externalizedCheckpointPath) return null; + if (!checkpointArtifact || checkpointArtifact.readable !== true) return null; + + return { + sessionKey: toSlug([input.currentStep, input.waitingOn, input.nextStep].filter(Boolean).join('-')) || 'long-task-session', + checkpointPath: externalizedCheckpointPath, + verificationResult: `checkpoint artifact readable at ${externalizedCheckpointPath}`, + }; +} + +function validateSilentLaunch(input, classificationResult) { + if (!classificationResult.silentCandidate) { + return { + ok: true, + reason: 'not a silent long-task', + recommendedFallback: 'none', + requiredNextAction: 'proceed_with_normal_long_task_flow', + }; + } + + if (!input.checkpointTrigger) { + return { + ok: false, + reason: 'missing first forced checkpoint trigger', + recommendedFallback: 'non_silent_follow_up', + requiredNextAction: 'define_first_checkpoint_trigger_before_silent_launch', + }; + } + + if (!input.externalizedTrigger) { + return { + ok: false, + reason: 'missing externalized checkpoint path', + recommendedFallback: 'non_silent_follow_up', + requiredNextAction: 'bind_externalized_checkpoint_path_or_abort_silent_launch', + }; + } + + return { + ok: true, + reason: `${input.triggerKind || 'externalized'} trigger is defined`, + recommendedFallback: 'none', + requiredNextAction: 'proceed_with_silent_launch', + }; +} + +function planHandoff(classificationResult) { + if (classificationResult.needsOwnerDecision) return { mode: 'button_path' }; + return { mode: 'direct_reply' }; +} + +function main() { + const args = parseArgs(process.argv); + const raw = readInput(args.input); + const input = inferFromRequestText(normalizeRequest(raw)); + const classificationResult = classify(input); + const taskRecord = bootstrapTaskState(input, classificationResult); + const externalizedCheckpointPath = buildExternalizedCheckpointPath(input, classificationResult); + const checkpointArtifact = ensureCheckpointArtifact(externalizedCheckpointPath, input, classificationResult); + const progressEvidence = buildProgressEvidence(input, classificationResult, externalizedCheckpointPath, checkpointArtifact); + const silentLaunch = validateSilentLaunch(input, classificationResult); + const handoff = planHandoff(classificationResult); + + const output = { + classification: classificationResult.classification, + silentCandidate: classificationResult.silentCandidate, + needsOwnerDecision: classificationResult.needsOwnerDecision, + needsCheckpoint: classificationResult.needsCheckpoint, + needsSubagent: classificationResult.needsSubagent, + taskRecord, + progressEvidence, + externalizedCheckpointPath, + checkpointArtifact, + silentLaunchOk: silentLaunch.ok, + silentLaunchReason: silentLaunch.reason, + recommendedFallback: silentLaunch.recommendedFallback, + requiredNextAction: silentLaunch.requiredNextAction, + handoff, + }; + + process.stdout.write(JSON.stringify(output, null, args.pretty ? 2 : 0) + '\n'); +} + +try { + main(); +} catch (error) { + fail('CLI_ERROR', error && error.message ? error.message : 'unexpected error'); +} diff --git a/scripts/plan_long_task_auto_chain.mjs b/scripts/plan_long_task_auto_chain.mjs new file mode 100644 index 0000000..85320ce --- /dev/null +++ b/scripts/plan_long_task_auto_chain.mjs @@ -0,0 +1,182 @@ +#!/usr/bin/env node +import fs from 'fs'; + +function fail(code, message) { + process.stderr.write(`${code}: ${message}\n`); + process.exit(1); +} + +function parseArgs(argv) { + const args = { input: '', pretty: true }; + for (let i = 2; i < argv.length; i += 1) { + const arg = argv[i]; + if (arg === '--input') { + const value = argv[i + 1]; + if (!value || value.startsWith('--')) fail('CLI_ERROR', '--input requires a value'); + args.input = value; + i += 1; + } else if (arg === '--compact') { + args.pretty = false; + } else { + fail('CLI_ERROR', `unknown argument: ${arg}`); + } + } + return args; +} + +function readInput(path) { + if (!path || path === '-') return fs.readFileSync(0, 'utf8'); + return fs.readFileSync(path, 'utf8'); +} + +function parseJson(raw) { + try { + return JSON.parse(raw); + } catch { + fail('INVALID_JSON', 'input must be valid JSON'); + } +} + +function hasNonEmptyString(value) { + return typeof value === 'string' && value.trim().length > 0; +} + +function hasEvidenceObject(value) { + if (!value) return false; + if (hasNonEmptyString(value)) return true; + if (Array.isArray(value)) return value.length > 0; + if (typeof value === 'object') return Object.keys(value).length > 0; + return false; +} + +function normalizedAction(value) { + return hasNonEmptyString(value) ? value.trim() : ''; +} + +function evaluatePlan(input) { + const gateStatus = normalizedAction(input?.gateStatus); + const actorStage = normalizedAction(input?.actorStage); + const requiredNextAction = normalizedAction(input?.requiredNextAction || input?.concreteNextAction || input?.nextStep); + const reviewOutcome = normalizedAction(input?.reviewOutcome).toLowerCase(); + const blocker = normalizedAction(input?.blocker); + const executionEvidence = input?.executionEvidence; + const reviewEvidence = input?.reviewEvidence; + const blockerEvidence = input?.blockerEvidence; + + if (gateStatus !== 'pass') { + return { + plannerStatus: 'blocked_by_gate', + derivedAction: 'none', + dispatchMode: 'no_dispatch', + reason: 'gateStatus must pass before auto-chain planning can proceed', + requiredEvidence: ['gateStatus=pass'], + autoChainAllowed: false, + }; + } + + if (!requiredNextAction) { + return { + plannerStatus: 'none', + derivedAction: 'none', + dispatchMode: 'no_dispatch', + reason: 'no concrete next action available for auto-chain planning', + requiredEvidence: ['concreteNextAction'], + autoChainAllowed: false, + }; + } + + if (actorStage === 'implementer_result' && requiredNextAction === 'request_spec_review') { + if (!hasEvidenceObject(executionEvidence)) { + return { + plannerStatus: 'blocked_by_evidence', + derivedAction: 'none', + dispatchMode: 'no_dispatch', + reason: 'implementation evidence missing for review-required next action', + requiredEvidence: ['executionEvidence'], + autoChainAllowed: false, + }; + } + + return { + plannerStatus: 'pass', + derivedAction: 'dispatch_spec_review', + dispatchMode: 'dry_run_dispatch', + reason: 'implementation evidence present; derived spec review dispatch in dry-run mode', + requiredEvidence: ['executionEvidence'], + autoChainAllowed: true, + }; + } + + if (actorStage === 'spec_review' && reviewOutcome === 'pass' && requiredNextAction === 'request_code_quality_review') { + if (!hasEvidenceObject(reviewEvidence)) { + return { + plannerStatus: 'blocked_by_evidence', + derivedAction: 'none', + dispatchMode: 'no_dispatch', + reason: 'review pass evidence missing for code quality review transition', + requiredEvidence: ['reviewEvidence'], + autoChainAllowed: false, + }; + } + + return { + plannerStatus: 'pass', + derivedAction: 'dispatch_code_quality_review', + dispatchMode: 'dry_run_dispatch', + reason: 'review pass evidence present; derived code quality review dispatch in dry-run mode', + requiredEvidence: ['reviewEvidence'], + autoChainAllowed: true, + }; + } + + if (requiredNextAction === 'fix_review_findings' || hasNonEmptyString(blocker)) { + if (!hasEvidenceObject(blockerEvidence)) { + return { + plannerStatus: 'blocked_by_evidence', + derivedAction: 'none', + dispatchMode: 'no_dispatch', + reason: 'blocker evidence missing for retry/fix transition', + requiredEvidence: ['blockerEvidence'], + autoChainAllowed: false, + }; + } + + return { + plannerStatus: 'pass', + derivedAction: 'dispatch_fix_slice', + dispatchMode: 'dry_run_dispatch', + reason: 'blocker evidence present; derived retry/fix dispatch in dry-run mode', + requiredEvidence: ['blockerEvidence'], + autoChainAllowed: true, + }; + } + + return { + plannerStatus: 'none', + derivedAction: 'none', + dispatchMode: 'no_dispatch', + reason: 'no concrete next action matched a dry-run auto-chain transition', + requiredEvidence: ['matchedTransitionEvidence'], + autoChainAllowed: false, + }; +} + +function main() { + const args = parseArgs(process.argv); + const raw = readInput(args.input); + const input = parseJson(raw); + const output = evaluatePlan(input); + process.stdout.write(JSON.stringify(output, null, args.pretty ? 2 : 0) + '\n'); +} + +export { evaluatePlan }; + +const isDirectRun = process.argv[1] && fs.realpathSync(process.argv[1]) === fs.realpathSync(new URL(import.meta.url)); + +if (isDirectRun) { + try { + main(); + } catch (error) { + fail('CLI_ERROR', error && error.message ? error.message : 'unexpected error'); + } +} diff --git a/scripts/test_force_recall_long_task_preflight.mjs b/scripts/test_force_recall_long_task_preflight.mjs index f275ce1..afdf781 100755 --- a/scripts/test_force_recall_long_task_preflight.mjs +++ b/scripts/test_force_recall_long_task_preflight.mjs @@ -61,21 +61,32 @@ async function prepareTempWorkspace() { [plannerPath, path.join(tempWorkspace, 'scripts', 'plan_long_task_auto_chain.mjs')], [continuityGatePath, path.join(tempWorkspace, 'scripts', 'approved_plan_continuity_gate.mjs')], [handlerPath, path.join(tempWorkspace, 'hooks', 'force-recall', 'handler.ts')], - [path.join(repoRoot, 'docs', 'RULEBOOK.md'), path.join(tempWorkspace, 'docs', 'RULEBOOK.md')], - [path.join(repoRoot, 'SOUL.md'), path.join(tempWorkspace, 'SOUL.md')], [path.join(repoRoot, 'plugins', 'continuity', 'src', 'index.mjs'), path.join(tempWorkspace, 'plugins', 'continuity', 'src', 'index.mjs')], [path.join(repoRoot, 'plugins', 'continuity', 'src', 'adapters', 'force-recall.mjs'), path.join(tempWorkspace, 'plugins', 'continuity', 'src', 'adapters', 'force-recall.mjs')], + [path.join(repoRoot, 'plugins', 'continuity', 'src', 'adapters', 'generic-preflight.mjs'), path.join(tempWorkspace, 'plugins', 'continuity', 'src', 'adapters', 'generic-preflight.mjs')], [path.join(repoRoot, 'plugins', 'continuity', 'src', 'config', 'defaults.mjs'), path.join(tempWorkspace, 'plugins', 'continuity', 'src', 'config', 'defaults.mjs')], [path.join(repoRoot, 'plugins', 'continuity', 'src', 'config', 'schema.mjs'), path.join(tempWorkspace, 'plugins', 'continuity', 'src', 'config', 'schema.mjs')], [path.join(repoRoot, 'plugins', 'continuity', 'src', 'continuity', 'evaluator.mjs'), path.join(tempWorkspace, 'plugins', 'continuity', 'src', 'continuity', 'evaluator.mjs')], [path.join(repoRoot, 'plugins', 'continuity', 'src', 'continuity', 'receipt-validator.mjs'), path.join(tempWorkspace, 'plugins', 'continuity', 'src', 'continuity', 'receipt-validator.mjs')], [path.join(repoRoot, 'plugins', 'continuity', 'src', 'continuity', 'receipt-store.mjs'), path.join(tempWorkspace, 'plugins', 'continuity', 'src', 'continuity', 'receipt-store.mjs')], + [path.join(repoRoot, 'plugins', 'continuity', 'src', 'continuity', 'engine.mjs'), path.join(tempWorkspace, 'plugins', 'continuity', 'src', 'continuity', 'engine.mjs')], ]; for (const [src, dest] of copies) { await fs.copyFile(src, dest); } + await fs.writeFile( + path.join(tempWorkspace, 'docs', 'RULEBOOK.md'), + '# Test Fixture RULEBOOK\n\nMinimal clean-room fixture generated by scripts/test_force_recall_long_task_preflight.mjs.\n', + 'utf8', + ); + await fs.writeFile( + path.join(tempWorkspace, 'SOUL.md'), + '# Test Fixture SOUL\n\nMinimal clean-room fixture generated by scripts/test_force_recall_long_task_preflight.mjs.\n', + 'utf8', + ); + return tempWorkspace; } @@ -370,6 +381,8 @@ async function main() { assert.match(passInjected, /Do not stop at this completed-task boundary/, 'hook pass-path should explicitly forbid stopping at the completed-task boundary'); assert.match(passInjected, /Auto-dispatch the next task in the same approved plan, unless waiting_user, blocked, pending_verification, or high-risk stop applies/, 'hook pass-path should explain the auto-next obligation exceptions'); assert.match(passInjected, /Do not stop at this completed-task boundary/, 'hook pass-path should hard-gate the completed-task boundary'); + assert.match(passInjected, /Do not hand control back to the user with an ordinary progress update while auto-next is still obligatory/, 'hook pass-path should forbid ordinary progress handoff when auto-next obligation is active'); + assert.match(passInjected, /If you cannot prove the next dispatch, convert this into an explicit continuity failure instead of a normal status report/, 'hook pass-path should require failure conversion instead of normal progress reporting'); assert.doesNotMatch(passInjected, /\[APPROVED_PLAN_CONTINUITY_GATE\][\s\S]*status=pass/, 'hook pass-path should not let approved-plan continuity pass on dry-run dispatch alone'); const failInjected = await withPatchedWrapper(buildWrapperScript({ diff --git a/scripts/test_long_task_gate_lock.mjs b/scripts/test_long_task_gate_lock.mjs new file mode 100644 index 0000000..bd02eb9 --- /dev/null +++ b/scripts/test_long_task_gate_lock.mjs @@ -0,0 +1,197 @@ +#!/usr/bin/env node +import assert from 'node:assert/strict'; +import { spawnSync } from 'node:child_process'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); +const gateScript = path.join(__dirname, 'long_task_gate_lock.mjs'); + +const scenarios = [ + { + name: 'ordinary chat -> gateStatus=not_applicable', + input: { + classification: 'ordinary_chat', + message: 'just answer directly', + }, + expected: { + gateRequired: false, + gateStatus: 'not_applicable', + reasonIncludes: 'classification is not long_task', + allowedResponseModesIncludes: 'direct_reply', + requiredEvidenceLength: 0, + }, + }, + { + name: 'long-task missing externalized checkpoint -> gateStatus=fail', + input: { + classification: 'long_task', + silentContinuation: true, + }, + expected: { + gateRequired: true, + gateStatus: 'fail', + reasonIncludes: 'silent long-task cannot continue without externalized checkpoint path', + allowedResponseModesIncludes: 'non_silent_follow_up', + requiredEvidenceKey: 'externalizedCheckpoint', + }, + }, + { + name: 'long-task with explicit externalized checkpoint + concrete next action -> gateStatus=pass', + input: { + classification: 'long_task', + silentContinuation: true, + claimedExecution: true, + externalizedCheckpointPath: 'checkpoints/task-42.md', + concreteNextAction: 'Run the queued verifier and report back with output.', + }, + expected: { + gateRequired: true, + gateStatus: 'pass', + reasonIncludes: 'required long-task gate evidence is present or no gated condition was triggered', + allowedResponseModesIncludes: 'silent_continuation', + allowedResponseModesIncludesAlso: 'direct_reply', + requiredEvidenceLength: 0, + }, + }, + { + name: 'owner decision without button-path -> gateStatus=fail', + input: { + classification: 'long_task', + needsOwnerDecision: true, + replyClosureMode: 'plain_text', + }, + expected: { + gateRequired: true, + gateStatus: 'fail', + reasonIncludes: 'owner decision flow must end in button-path, not plain text', + allowedResponseModesIncludes: 'button_path', + requiredEvidenceKey: 'buttonPathMode', + }, + }, + { + name: 'owner decision with button-path -> gateStatus=pass', + input: { + classification: 'long_task', + needsOwnerDecision: true, + replyClosureMode: 'button_path', + }, + expected: { + gateRequired: true, + gateStatus: 'pass', + reasonIncludes: 'required long-task gate evidence is present or no gated condition was triggered', + allowedResponseModesIncludes: 'button_path', + requiredEvidenceLength: 0, + }, + }, +]; + +function runGate(input) { + const result = spawnSync(process.execPath, [gateScript, '--compact'], { + input: JSON.stringify(input), + encoding: 'utf8', + }); + + if (result.status !== 0) { + throw new Error(`gate script failed with status=${result.status}: ${result.stderr || result.stdout}`); + } + + let parsed; + try { + parsed = JSON.parse(result.stdout); + } catch (error) { + throw new Error(`gate script returned invalid JSON: ${error.message}\nstdout=${result.stdout}`); + } + + return parsed; +} + +function requireCoreFields(output) { + assert.equal(typeof output.gateRequired, 'boolean', 'gateRequired should be boolean'); + assert.equal(typeof output.gateStatus, 'string', 'gateStatus should be string'); + assert.ok(Array.isArray(output.reasons), 'reasons should be an array'); + assert.ok(Array.isArray(output.requiredEvidence), 'requiredEvidence should be an array'); + assert.ok(Array.isArray(output.allowedResponseModes), 'allowedResponseModes should be an array'); +} + +function assertScenario(output, expected) { + assert.equal(output.gateRequired, expected.gateRequired, 'gateRequired mismatch'); + assert.equal(output.gateStatus, expected.gateStatus, 'gateStatus mismatch'); + + if (expected.reasonIncludes) { + assert.ok( + output.reasons.some((reason) => reason.includes(expected.reasonIncludes)), + `expected reasons to include: ${expected.reasonIncludes}`, + ); + } + + if (expected.allowedResponseModesIncludes) { + assert.ok( + output.allowedResponseModes.includes(expected.allowedResponseModesIncludes), + `expected allowedResponseModes to include: ${expected.allowedResponseModesIncludes}`, + ); + } + + if (expected.allowedResponseModesIncludesAlso) { + assert.ok( + output.allowedResponseModes.includes(expected.allowedResponseModesIncludesAlso), + `expected allowedResponseModes to include: ${expected.allowedResponseModesIncludesAlso}`, + ); + } + + if (typeof expected.requiredEvidenceLength === 'number') { + assert.equal( + output.requiredEvidence.length, + expected.requiredEvidenceLength, + 'requiredEvidence length mismatch', + ); + } + + if (expected.requiredEvidenceKey) { + assert.ok( + output.requiredEvidence.some((entry) => entry && entry.evidenceKey === expected.requiredEvidenceKey), + `expected requiredEvidence to include key: ${expected.requiredEvidenceKey}`, + ); + } +} + +const results = []; +let failed = false; + +for (const scenario of scenarios) { + try { + const output = runGate(scenario.input); + requireCoreFields(output); + assertScenario(output, scenario.expected); + + results.push({ + scenario: scenario.name, + ok: true, + gateRequired: output.gateRequired, + gateStatus: output.gateStatus, + reasons: output.reasons, + requiredEvidenceKeys: output.requiredEvidence.map((entry) => entry.evidenceKey), + allowedResponseModes: output.allowedResponseModes, + assertion: 'pass', + }); + } catch (error) { + failed = true; + results.push({ + scenario: scenario.name, + ok: false, + assertion: 'fail', + error: error instanceof Error ? error.message : String(error), + }); + } +} + +const summary = { + total: results.length, + passed: results.filter((entry) => entry.ok).length, + failed: results.filter((entry) => !entry.ok).length, +}; + +process.stdout.write(`${JSON.stringify({ summary, results }, null, 2)}\n`); + +if (failed) process.exit(1); diff --git a/scripts/test_long_task_governor_wrapper.mjs b/scripts/test_long_task_governor_wrapper.mjs new file mode 100644 index 0000000..7c52afe --- /dev/null +++ b/scripts/test_long_task_governor_wrapper.mjs @@ -0,0 +1,179 @@ +#!/usr/bin/env node +import assert from 'node:assert/strict'; +import fs from 'node:fs'; +import os from 'node:os'; +import { execFileSync, spawnSync } from 'node:child_process'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); +const repoRoot = path.resolve(__dirname, '..'); +const wrapperPath = path.join(repoRoot, 'scripts', 'long_task_governor_wrapper.mjs'); + +const fixtures = [ + { + name: 'example', + file: path.join(repoRoot, 'docs', '_artifacts', 'long_task_governor_wrapper_example.json'), + assert(output) { + assert.equal(output.classification, 'long_task'); + }, + }, + { + name: 'borderline wrapper inference', + input: { + requestText: 'Inspect the current hook and compare it to the wrapper outputs before replying.', + canReplyNow: false, + }, + assert(output) { + assert.equal(output.classification, 'long_task'); + assert.equal(output.needsCheckpoint, true); + }, + }, + { + name: 'invalid silent', + file: path.join(repoRoot, 'docs', '_artifacts', 'long_task_governor_wrapper_invalid_silent_example.json'), + assert(output) { + assert.equal(output.silentLaunchOk, false); + }, + }, + { + name: 'general chat', + file: path.join(repoRoot, 'docs', '_artifacts', 'long_task_governor_wrapper_general_chat_example.json'), + assert(output) { + assert.equal(output.classification, 'general_chat'); + }, + }, + { + name: 'non-silent long task', + file: path.join(repoRoot, 'docs', '_artifacts', 'long_task_governor_wrapper_non_silent_long_task_example.json'), + assert(output) { + assert.equal(output.classification, 'long_task'); + assert.equal(output.silentCandidate, false); + }, + }, + { + name: 'owner decision', + file: path.join(repoRoot, 'docs', '_artifacts', 'long_task_governor_wrapper_silent_owner_decision_example.json'), + assert(output) { + assert.equal(output.handoff.mode, 'button_path'); + }, + }, + { + name: 'subagent wait', + file: path.join(repoRoot, 'docs', '_artifacts', 'long_task_governor_wrapper_subagent_wait_example.json'), + assert(output) { + assert.equal(output.silentCandidate, true); + assert.ok(output.progressEvidence && typeof output.progressEvidence === 'object', 'subagent wait: missing progressEvidence'); + assert.equal(typeof output.progressEvidence.sessionKey, 'string', 'subagent wait: missing progressEvidence.sessionKey'); + assert.ok(output.progressEvidence.sessionKey.length > 0, 'subagent wait: empty progressEvidence.sessionKey'); + assert.equal(typeof output.externalizedCheckpointPath, 'string', 'subagent wait: missing externalizedCheckpointPath'); + assert.ok(output.externalizedCheckpointPath.length > 0, 'subagent wait: empty externalizedCheckpointPath'); + assert.equal('task_name' in (output.progressEvidence ?? {}), false, 'subagent wait: progressEvidence must not backfill taskRecord.task_name'); + assert.equal(JSON.stringify(output.progressEvidence).includes('Wait for delegated log survey'), false, 'subagent wait: progressEvidence must not derive from taskRecord.task_name'); + }, + }, +]; + +function runFixture(fixture) { + const args = fixture.file + ? [wrapperPath, '--compact', '--input', fixture.file] + : [wrapperPath, '--compact']; + const options = { + cwd: repoRoot, + encoding: 'utf8', + }; + + if (fixture.input) { + options.input = `${JSON.stringify(fixture.input)}\n`; + } + + const stdout = execFileSync(process.execPath, args, options); + + let output; + try { + output = JSON.parse(stdout); + } catch (error) { + throw new Error(`Fixture \"${fixture.name}\" did not produce valid JSON: ${error.message}\nOutput: ${stdout}`); + } + + assert.ok(output.classification !== undefined, `${fixture.name}: missing classification`); + assert.ok(output.silentCandidate !== undefined, `${fixture.name}: missing silentCandidate`); + assert.ok(output.silentLaunchOk !== undefined, `${fixture.name}: missing silentLaunchOk`); + assert.ok(output.requiredNextAction !== undefined, `${fixture.name}: missing requiredNextAction`); + assert.ok(output.handoff && output.handoff.mode !== undefined, `${fixture.name}: missing handoff.mode`); + + fixture.assert(output); + + return { + name: fixture.name, + output, + }; +} + +function assertErrorCase(name, args, expectedStderr, input) { + const result = spawnSync(process.execPath, [wrapperPath, ...args], { + cwd: repoRoot, + encoding: 'utf8', + input, + }); + + assert.notEqual(result.status, 0, `${name}: expected non-zero exit`); + assert.equal(result.stdout, '', `${name}: expected empty stdout`); + assert.equal(result.stderr.trim(), expectedStderr, `${name}: unexpected stderr`); +} + +function main() { + const results = fixtures.map(runFixture); + + const realismWorkspace = fs.mkdtempSync(path.join(os.tmpdir(), 'wrapper-realism-')); + try { + const realismInput = path.join(repoRoot, 'docs', '_artifacts', 'long_task_governor_wrapper_subagent_wait_example.json'); + const stdout = execFileSync(process.execPath, [wrapperPath, '--compact', '--input', realismInput], { + cwd: realismWorkspace, + encoding: 'utf8', + }); + const output = JSON.parse(stdout); + assert.equal(typeof output.externalizedCheckpointPath, 'string', 'realism: missing externalizedCheckpointPath'); + assert.ok(output.externalizedCheckpointPath.length > 0, 'realism: empty externalizedCheckpointPath'); + const artifactPath = path.join(realismWorkspace, output.externalizedCheckpointPath); + assert.ok(fs.existsSync(artifactPath), `realism: checkpoint artifact missing at ${artifactPath}`); + const artifactBody = fs.readFileSync(artifactPath, 'utf8'); + assert.ok(artifactBody.trim().length > 0, 'realism: checkpoint artifact should be readable and non-empty'); + assert.equal('task_name' in (output.progressEvidence ?? {}), false, 'realism: progressEvidence must not include task_name fallback'); + assert.equal(artifactBody.includes('Wait for delegated log survey'), false, 'realism: checkpoint artifact must not fall back to taskRecord.task_name'); + results.push({ + name: 'real checkpoint artifact', + output: { + classification: output.classification, + silentCandidate: output.silentCandidate, + silentLaunchOk: output.silentLaunchOk, + requiredNextAction: output.requiredNextAction, + handoff: output.handoff, + }, + }); + } finally { + fs.rmSync(realismWorkspace, { recursive: true, force: true }); + } + + assertErrorCase('invalid json', ['--compact'], 'INVALID_JSON: input must be valid JSON', 'not-json\n'); + assertErrorCase('missing input value', ['--input'], 'CLI_ERROR: --input requires a value'); + assertErrorCase('unknown argument', ['--bogus'], 'CLI_ERROR: unknown argument: --bogus'); + + const summary = { + passed: results.length, + fixtures: results.map(({ name, output }) => ({ + name, + classification: output.classification, + silentCandidate: output.silentCandidate, + silentLaunchOk: output.silentLaunchOk, + requiredNextAction: output.requiredNextAction, + handoffMode: output.handoff.mode, + })), + errorCases: 3, + }; + + process.stdout.write(JSON.stringify(summary, null, 2) + '\n'); +} + +main(); diff --git a/scripts/test_plan_long_task_auto_chain.mjs b/scripts/test_plan_long_task_auto_chain.mjs new file mode 100644 index 0000000..5ae5f8c --- /dev/null +++ b/scripts/test_plan_long_task_auto_chain.mjs @@ -0,0 +1,240 @@ +#!/usr/bin/env node +import assert from 'node:assert/strict'; +import { spawnSync } from 'node:child_process'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); +const plannerScript = path.join(__dirname, 'plan_long_task_auto_chain.mjs'); + +const scenarios = [ + { + name: 'implementer result with review-required next action -> review dispatch', + input: { + gateStatus: 'pass', + actorStage: 'implementer_result', + requiredNextAction: 'request_spec_review', + executionEvidence: { + modifiedFiles: ['scripts/example.mjs'], + verificationResult: 'tests pass', + }, + }, + expected: { + plannerStatus: 'pass', + derivedAction: 'dispatch_spec_review', + dispatchMode: 'dry_run_dispatch', + autoChainAllowed: true, + reasonIncludes: 'implementation evidence present', + requiredEvidenceIncludes: 'executionEvidence', + }, + }, + { + name: 'spec review PASS -> code quality review dispatch', + input: { + gateStatus: 'pass', + actorStage: 'spec_review', + reviewOutcome: 'pass', + requiredNextAction: 'request_code_quality_review', + reviewEvidence: { + reviewer: 'spec-reviewer', + verdict: 'pass', + }, + }, + expected: { + plannerStatus: 'pass', + derivedAction: 'dispatch_code_quality_review', + dispatchMode: 'dry_run_dispatch', + autoChainAllowed: true, + reasonIncludes: 'review pass evidence present', + requiredEvidenceIncludes: 'reviewEvidence', + }, + }, + { + name: 'explicit blocker -> retry/fix action', + input: { + gateStatus: 'pass', + actorStage: 'review_result', + blocker: 'tests failed in review', + requiredNextAction: 'fix_review_findings', + blockerEvidence: { + reviewer: 'qa-reviewer', + finding: 'tests failed', + }, + }, + expected: { + plannerStatus: 'pass', + derivedAction: 'dispatch_fix_slice', + dispatchMode: 'dry_run_dispatch', + autoChainAllowed: true, + reasonIncludes: 'blocker evidence present', + requiredEvidenceIncludes: 'blockerEvidence', + }, + }, + { + name: 'no concrete next action -> none', + input: { + gateStatus: 'pass', + actorStage: 'implementer_result', + executionEvidence: { + modifiedFiles: ['scripts/example.mjs'], + }, + }, + expected: { + plannerStatus: 'none', + derivedAction: 'none', + dispatchMode: 'no_dispatch', + autoChainAllowed: false, + reasonIncludes: 'no concrete next action', + requiredEvidenceIncludes: 'concreteNextAction', + }, + }, + { + name: 'gate fail refuses auto-chain', + input: { + gateStatus: 'fail', + actorStage: 'implementer_result', + requiredNextAction: 'request_spec_review', + executionEvidence: { + modifiedFiles: ['scripts/example.mjs'], + }, + }, + expected: { + plannerStatus: 'blocked_by_gate', + derivedAction: 'none', + dispatchMode: 'no_dispatch', + autoChainAllowed: false, + reasonIncludes: 'gateStatus must pass', + requiredEvidenceIncludes: 'gateStatus=pass', + }, + }, + { + name: 'textual review request without implementation evidence -> blocked_by_evidence', + input: { + gateStatus: 'pass', + actorStage: 'implementer_result', + requiredNextAction: 'request_spec_review', + }, + expected: { + plannerStatus: 'blocked_by_evidence', + derivedAction: 'none', + dispatchMode: 'no_dispatch', + autoChainAllowed: false, + reasonIncludes: 'implementation evidence missing', + requiredEvidenceIncludes: 'executionEvidence', + }, + }, + { + name: 'spec review pass without review evidence -> blocked_by_evidence', + input: { + gateStatus: 'pass', + actorStage: 'spec_review', + reviewOutcome: 'pass', + requiredNextAction: 'request_code_quality_review', + }, + expected: { + plannerStatus: 'blocked_by_evidence', + derivedAction: 'none', + dispatchMode: 'no_dispatch', + autoChainAllowed: false, + reasonIncludes: 'review pass evidence missing', + requiredEvidenceIncludes: 'reviewEvidence', + }, + }, + { + name: 'fix slice without blocker evidence -> blocked_by_evidence', + input: { + gateStatus: 'pass', + actorStage: 'review_result', + blocker: 'hook_preflight_blocker', + requiredNextAction: 'fix_review_findings', + }, + expected: { + plannerStatus: 'blocked_by_evidence', + derivedAction: 'none', + dispatchMode: 'no_dispatch', + autoChainAllowed: false, + reasonIncludes: 'blocker evidence missing', + requiredEvidenceIncludes: 'blockerEvidence', + }, + }, +]; + +function runPlanner(input) { + const result = spawnSync(process.execPath, [plannerScript, '--compact'], { + input: JSON.stringify(input), + encoding: 'utf8', + }); + + if (result.status !== 0) { + throw new Error(`planner script failed with status=${result.status}: ${result.stderr || result.stdout}`); + } + + let parsed; + try { + parsed = JSON.parse(result.stdout); + } catch (error) { + throw new Error(`planner script returned invalid JSON: ${error.message}\nstdout=${result.stdout}`); + } + + return parsed; +} + +function requireCoreFields(output) { + assert.equal(typeof output.plannerStatus, 'string', 'plannerStatus should be string'); + assert.equal(typeof output.derivedAction, 'string', 'derivedAction should be string'); + assert.equal(typeof output.dispatchMode, 'string', 'dispatchMode should be string'); + assert.equal(typeof output.reason, 'string', 'reason should be string'); + assert.ok(Array.isArray(output.requiredEvidence), 'requiredEvidence should be an array'); + assert.equal(typeof output.autoChainAllowed, 'boolean', 'autoChainAllowed should be boolean'); +} + +function assertScenario(output, expected) { + assert.equal(output.plannerStatus, expected.plannerStatus, 'plannerStatus mismatch'); + assert.equal(output.derivedAction, expected.derivedAction, 'derivedAction mismatch'); + assert.equal(output.dispatchMode, expected.dispatchMode, 'dispatchMode mismatch'); + assert.equal(output.autoChainAllowed, expected.autoChainAllowed, 'autoChainAllowed mismatch'); + assert.match(output.reason, new RegExp(expected.reasonIncludes.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'))); + assert.ok( + output.requiredEvidence.includes(expected.requiredEvidenceIncludes), + `expected requiredEvidence to include: ${expected.requiredEvidenceIncludes}`, + ); +} + +const results = []; +let failed = false; + +for (const scenario of scenarios) { + try { + const output = runPlanner(scenario.input); + requireCoreFields(output); + assertScenario(output, scenario.expected); + results.push({ + scenario: scenario.name, + ok: true, + plannerStatus: output.plannerStatus, + derivedAction: output.derivedAction, + dispatchMode: output.dispatchMode, + autoChainAllowed: output.autoChainAllowed, + reason: output.reason, + requiredEvidence: output.requiredEvidence, + }); + } catch (error) { + failed = true; + results.push({ + scenario: scenario.name, + ok: false, + error: error instanceof Error ? error.message : String(error), + }); + } +} + +const summary = { + total: results.length, + passed: results.filter((entry) => entry.ok).length, + failed: results.filter((entry) => !entry.ok).length, +}; + +process.stdout.write(`${JSON.stringify({ summary, results }, null, 2)}\n`); + +if (failed) process.exit(1);