diff --git a/hooks/force-recall/handler.ts b/hooks/force-recall/handler.ts index a1c7eb4..3b22beb 100644 --- a/hooks/force-recall/handler.ts +++ b/hooks/force-recall/handler.ts @@ -98,6 +98,33 @@ function buildProgressEvidence(wrapperResult: any): Record | nu return Object.keys(progressEvidence).length > 0 ? progressEvidence : null; } +function shouldClaimProgression(wrapperResult: any, progressEvidence: Record | null): boolean { + if (!wrapperResult || wrapperResult.classification !== "long_task") return false; + if (progressEvidence && Object.keys(progressEvidence).length > 0) return true; + + const requiredNextAction = typeof wrapperResult.requiredNextAction === "string" + ? wrapperResult.requiredNextAction.trim() + : ""; + const progressingActionPrefixes = [ + "dispatch_", + "handoff_", + "launch_", + "resume_", + "continue_", + "queue_", + "schedule_", + "run_", + "start_", + "spawn_", + ]; + + if (requiredNextAction && progressingActionPrefixes.some((prefix) => requiredNextAction.startsWith(prefix))) { + return true; + } + + return wrapperResult.silentLaunchOk === true; +} + function buildGateLockInput(wrapperResult: any): Record { if (!wrapperResult || wrapperResult.classification !== "long_task") { return { classification: wrapperResult?.classification ?? "general_chat" }; @@ -124,16 +151,16 @@ function buildGateLockInput(wrapperResult: any): Record { concreteNextAction: requiredNextAction, } : null; - const progressEvidenceReason = progressEvidence - ? "" - : "progression claim requires concrete evidence such as sessionKey, runId, modified_files, or verification result"; + const claimedProgression = shouldClaimProgression(wrapperResult, progressEvidence) + ? "already progressing to the next step in background" + : ""; + const progressEvidenceReason = claimedProgression && !progressEvidence + ? "progression claim requires concrete evidence such as sessionKey, runId, modified_files, or verification result" + : ""; const hasExternalizedCheckpointEvidence = wrapperResult.silentLaunchOk === true && typeof wrapperResult.taskRecord?.task_name === "string" && wrapperResult.taskRecord.task_name.trim().length > 0; const hasButtonPathClosureEvidence = needsOwnerDecision && wrapperResult.silentLaunchOk === true; - const claimedProgression = wrapperResult.classification === "long_task" - ? "already progressing to the next step in background" - : ""; return { classification: wrapperResult.classification, diff --git a/scripts/long_task_gate_lock.mjs b/scripts/long_task_gate_lock.mjs index 2f1abff..86700f1 100644 --- a/scripts/long_task_gate_lock.mjs +++ b/scripts/long_task_gate_lock.mjs @@ -190,13 +190,16 @@ function hasProgressEvidence(input) { }); } -function claimsProgressionWithoutEvidence(input) { +function claimsProgression(input) { const progressionClaim = EVIDENCE_FIELDS.progressionClaim .map((fieldPath) => getPathValue(input, fieldPath)) .find((value) => hasNonEmptyString(value)); - if (!hasNonEmptyString(progressionClaim)) return false; + return hasNonEmptyString(progressionClaim); +} +function claimsProgressionWithoutEvidence(input) { + if (!claimsProgression(input)) return false; return !hasProgressEvidence(input); } @@ -269,8 +272,14 @@ function main() { process.stdout.write(JSON.stringify(output, null, args.pretty ? 2 : 0) + '\n'); } -try { - main(); -} catch (error) { - fail('CLI_ERROR', error && error.message ? error.message : 'unexpected error'); +export { evaluateGate }; + +const isDirectRun = process.argv[1] && fs.realpathSync(process.argv[1]) === fs.realpathSync(new URL(import.meta.url)); + +if (isDirectRun) { + try { + main(); + } catch (error) { + fail('CLI_ERROR', error && error.message ? error.message : 'unexpected error'); + } } diff --git a/scripts/test_force_recall_long_task_preflight.mjs b/scripts/test_force_recall_long_task_preflight.mjs old mode 100644 new mode 100755 index 969aadd..f9abc8f --- a/scripts/test_force_recall_long_task_preflight.mjs +++ b/scripts/test_force_recall_long_task_preflight.mjs @@ -40,6 +40,20 @@ async function runScenario(forceRecall, requestText) { return injected; } +async function withPatchedWrapper(tempContent, callback) { + const originalWrapper = await fs.readFile(wrapperPath, 'utf8'); + await fs.writeFile(wrapperPath, tempContent, 'utf8'); + try { + return await callback(); + } finally { + await fs.writeFile(wrapperPath, originalWrapper, 'utf8'); + } +} + +function buildWrapperScript(wrapperResult) { + return `#!/usr/bin/env node\nprocess.stdout.write(JSON.stringify(${JSON.stringify(wrapperResult)}, null, 0) + "\\n");\n`; +} + async function main() { await Promise.all([fs.access(wrapperPath), fs.access(gateLockPath)]); const { default: forceRecall } = await importTsModule(handlerPath); @@ -66,21 +80,95 @@ async function main() { 'reason=silent long-task cannot continue without externalized checkpoint path', 'reason=claimed execution requires evidence of a concrete next action', 'reason=owner decision flow must end in button-path, not plain text', - 'reason=claimed progression without concrete progress evidence is forbidden', - 'requiredEvidence=progressEvidence', - 'requiredValue=sessionKey, runId, modified_files, verification result, or equivalent concrete progress evidence', 'ENFORCEMENT: Hook input should include progressEvidence (or equivalent concrete fields) whenever a progression claim is present.', 'HARD_GATE: Block any plain-text handoff or silent-continuation claim when externalized checkpoint evidence is missing.', - 'HARD_GATE: Block any reply path that says you already moved into the next task or are advancing the next step without concrete progress evidence.', - 'HARD_GATE: If a progression claim exists, the hook input must supply progressEvidence (or equivalent concrete fields) before the claim can pass gate.', - 'HARD_GATE: Do not say you are already on the next task, already dispatched follow-up work, or already progressing in background unless you can point to a sessionKey, runId, modified_files record, verification result, actual tool execution, file changes, emitted messages, or checkpoint records.', + 'HARD_GATE: If owner decision is involved, do not replace button-path closure with plain-text handoff.', 'ENFORCEMENT: Forbidden path: plain-text handoff that pretends the long task is already continuing without an externalized checkpoint.', 'ENFORCEMENT: Forbidden path: stating you have already entered the next task/step when the record only contains planning language and no concrete execution evidence.', ]; + const unexpectedSnippets = [ + 'reason=claimed progression without concrete progress evidence is forbidden', + 'requiredEvidence=progressEvidence', + ]; + for (const snippet of expectedSnippets) { assert.match(injected, new RegExp(escapeRegex(snippet)), `missing snippet: ${snippet}`); } + for (const snippet of unexpectedSnippets) { + assert.doesNotMatch(injected, new RegExp(escapeRegex(snippet)), `unexpected snippet present: ${snippet}`); + } + + const { evaluateGate } = await import(pathToFileURL(gateLockPath).href + `?t=${Date.now()}`); + assert.equal(typeof evaluateGate, 'function', 'long_task_gate_lock should export evaluateGate for direct tests'); + + const passResult = evaluateGate({ + classification: 'long_task', + claimedExecution: true, + concreteNextAction: 'dispatch_follow_up_subagent', + progressionClaim: 'already progressing to the next step in background', + progressEvidence: { sessionKey: 'task-123' }, + }); + assert.equal(passResult.gateStatus, 'pass', 'pass-path should pass with concrete progressEvidence'); + + const failResult = evaluateGate({ + classification: 'long_task', + claimedExecution: true, + concreteNextAction: 'dispatch_follow_up_subagent', + progressionClaim: 'already progressing to the next step in background', + executionEvidence: { concreteNextAction: 'dispatch_follow_up_subagent' }, + }); + assert.equal(failResult.gateStatus, 'fail', 'fail-path should fail when progressionClaim lacks progressEvidence'); + assert.match(JSON.stringify(failResult), /progressEvidence/, 'fail-path should require progressEvidence'); + + const neutralResult = evaluateGate({ + classification: 'long_task', + claimedExecution: true, + concreteNextAction: 'dispatch_follow_up_subagent', + executionEvidence: { concreteNextAction: 'dispatch_follow_up_subagent' }, + }); + assert.equal(neutralResult.gateStatus, 'pass', 'neutral-path should pass when there is no progression claim'); + assert.doesNotMatch(JSON.stringify(neutralResult), /progressEvidence/, 'neutral-path should not require progressEvidence'); + + const passInjected = await withPatchedWrapper(buildWrapperScript({ + classification: 'long_task', + silentCandidate: true, + needsCheckpoint: true, + needsSubagent: false, + needsOwnerDecision: false, + silentLaunchOk: true, + silentLaunchReason: 'checkpoint established', + requiredNextAction: 'dispatch_follow_up_subagent', + taskRecord: { task_name: 'task-123' }, + handoff: { mode: 'direct_reply' }, + }), async () => runScenario(forceRecall, requestText)); + assert.match(passInjected, /gateStatus=pass/, 'hook pass-path should pass when wrapper provides concrete progressEvidence'); + + const failInjected = await withPatchedWrapper(buildWrapperScript({ + classification: 'long_task', + silentCandidate: false, + needsCheckpoint: false, + needsSubagent: false, + needsOwnerDecision: false, + silentLaunchOk: false, + requiredNextAction: 'dispatch_follow_up_subagent', + handoff: { mode: 'direct_reply' }, + }), async () => runScenario(forceRecall, requestText)); + assert.match(failInjected, /gateStatus=fail/, 'hook fail-path should fail when wrapper claims progression without progressEvidence'); + assert.match(failInjected, /reason=claimed progression without concrete progress evidence is forbidden/, 'hook fail-path should mention missing progress evidence'); + + const neutralInjected = await withPatchedWrapper(buildWrapperScript({ + classification: 'long_task', + silentCandidate: false, + needsCheckpoint: false, + needsSubagent: false, + needsOwnerDecision: false, + silentLaunchOk: false, + requiredNextAction: 'summarize_findings_for_reply', + handoff: { mode: 'direct_reply' }, + }), async () => runScenario(forceRecall, requestText)); + assert.match(neutralInjected, /gateStatus=pass/, 'hook neutral-path should pass when wrapper does not claim progression'); + assert.doesNotMatch(neutralInjected, /reason=claimed progression without concrete progress evidence is forbidden/, 'hook neutral-path should not fail on missing progress evidence without a progression claim'); const originalGateLock = await fs.readFile(gateLockPath, 'utf8'); const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'force-recall-gate-lock-')); @@ -109,14 +197,15 @@ async function main() { assert.match(degradedInjected, new RegExp(escapeRegex(snippet)), `missing degraded snippet: ${snippet}`); } - const summary = { + process.stdout.write(JSON.stringify({ ok: true, - checked: expectedSnippets, - degradedChecked: degradedExpectedSnippets, + gatePaths: { + pass: passResult.gateStatus, + fail: failResult.gateStatus, + neutral: neutralResult.gateStatus, + }, bodyPreview: injected.split('\n').slice(0, 35), - }; - - process.stdout.write(JSON.stringify(summary, null, 2) + '\n'); + }, null, 2) + '\n'); } main().catch((error) => {