From 91529166feeecfa4f9a8f83aebd14938c2d74f60 Mon Sep 17 00:00:00 2001 From: Eve Date: Thu, 23 Apr 2026 11:47:53 +0800 Subject: [PATCH] feat: inject long-task gate lock into force-recall hook --- hooks/force-recall/handler.ts | 181 +++++++++++++++--- .../test_force_recall_long_task_preflight.mjs | 20 +- 2 files changed, 174 insertions(+), 27 deletions(-) diff --git a/hooks/force-recall/handler.ts b/hooks/force-recall/handler.ts index b64de8a..825cacb 100644 --- a/hooks/force-recall/handler.ts +++ b/hooks/force-recall/handler.ts @@ -5,6 +5,20 @@ import { execFile } from "node:child_process"; import { promisify } from "node:util"; const execFileAsync = promisify(execFile); +const LONG_TASK_WRAPPER_TIMEOUT_MS = 8000; +const LONG_TASK_GATE_LOCK_TIMEOUT_MS = 8000; + +type GateLockResult = { + gateRequired: boolean; + gateStatus: "not_applicable" | "pass" | "fail"; + reasons?: string[]; + requiredEvidence?: Array<{ + evidenceKey?: string; + acceptedFields?: string[]; + requiredValue?: string; + }>; + allowedResponseModes?: string[]; +}; function clamp(s: string, max = 1200): string { if (!s) return s; @@ -22,36 +36,20 @@ async function safeReadText(filePath: string): Promise { } } -async function runLongTaskWrapper(workspaceDir: string, ctx: any): Promise { +async function runJsonScript(scriptPath: string, workspaceDir: string, input: Record, timeout: number): Promise { let tempInputPath: string | null = null; try { - const wrapperPath = path.join(workspaceDir, "scripts", "long_task_governor_wrapper.mjs"); - const input = { - requestText: (ctx.body ?? ctx.content ?? ctx.bodyForAgent ?? "") as string, - hasFilesOrSystems: false, - needsWaiting: false, - needsSubagent: false, - needsOwnerDecision: false, - canReplyNow: false, - taskName: "Hook preflight classification", - currentStep: "Classifying request at preprocessed hook", - nextStep: "Carry governor recommendation into prompt context", - nextReportCondition: "At next meaningful milestone", - waitingOn: "none", - blocker: "none", - checkpointTrigger: "", - externalizedTrigger: "", - triggerKind: "", - }; - - tempInputPath = path.join(os.tmpdir(), `openclaw-long-task-hook-${process.pid}-${Date.now()}.json`); + tempInputPath = path.join( + os.tmpdir(), + `openclaw-hook-${path.basename(scriptPath, path.extname(scriptPath))}-${process.pid}-${Date.now()}.json`, + ); await fs.writeFile(tempInputPath, JSON.stringify(input), "utf-8"); - const { stdout } = await execFileAsync("node", [wrapperPath, "--compact", "--input", tempInputPath], { + const { stdout } = await execFileAsync("node", [scriptPath, "--compact", "--input", tempInputPath], { cwd: workspaceDir, maxBuffer: 1024 * 1024, - timeout: 8000, + timeout, }); return JSON.parse(stdout); @@ -64,6 +62,75 @@ async function runLongTaskWrapper(workspaceDir: string, ctx: any): Promise { + const wrapperPath = path.join(workspaceDir, "scripts", "long_task_governor_wrapper.mjs"); + const input = { + requestText: (ctx.body ?? ctx.content ?? ctx.bodyForAgent ?? "") as string, + hasFilesOrSystems: false, + needsWaiting: false, + needsSubagent: false, + needsOwnerDecision: false, + canReplyNow: false, + taskName: "Hook preflight classification", + currentStep: "Classifying request at preprocessed hook", + nextStep: "Carry governor recommendation into prompt context", + nextReportCondition: "At next meaningful milestone", + waitingOn: "none", + blocker: "none", + checkpointTrigger: "", + externalizedTrigger: "", + triggerKind: "", + }; + + return runJsonScript(wrapperPath, workspaceDir, input, LONG_TASK_WRAPPER_TIMEOUT_MS); +} + +function buildGateLockInput(wrapperResult: any): Record { + if (!wrapperResult || wrapperResult.classification !== "long_task") { + return { classification: wrapperResult?.classification ?? "general_chat" }; + } + + const needsOwnerDecision = wrapperResult.needsOwnerDecision === true; + const silentCandidate = wrapperResult.silentCandidate === true; + const requiredNextAction = typeof wrapperResult.requiredNextAction === "string" + ? wrapperResult.requiredNextAction.trim() + : ""; + const hasConcreteExecutionEvidence = Boolean( + requiredNextAction + && ![ + "", + "proceed_with_normal_long_task_flow", + "proceed_with_silent_launch", + "define_first_checkpoint_trigger_before_silent_launch", + "bind_externalized_checkpoint_path_or_abort_silent_launch", + ].includes(requiredNextAction), + ); + const hasExternalizedCheckpointEvidence = wrapperResult.silentLaunchOk === true + && typeof wrapperResult.taskRecord?.task_name === "string" + && wrapperResult.taskRecord.task_name.trim().length > 0; + const hasButtonPathClosureEvidence = needsOwnerDecision && wrapperResult.silentLaunchOk === true; + + return { + classification: wrapperResult.classification, + silentContinuation: silentCandidate, + claimedExecution: true, + needsOwnerDecision, + nextStep: hasConcreteExecutionEvidence ? requiredNextAction : "", + requiredNextAction: hasConcreteExecutionEvidence ? requiredNextAction : "", + concreteNextAction: hasConcreteExecutionEvidence ? requiredNextAction : "", + externalizedCheckpointPath: hasExternalizedCheckpointEvidence ? wrapperResult.taskRecord.task_name.trim() : "", + externalizedTrigger: hasExternalizedCheckpointEvidence ? "hook-preflight-checkpoint" : "", + handoffMode: hasButtonPathClosureEvidence ? (wrapperResult.handoff?.mode ?? "button_path") : "direct_reply", + replyClosureMode: hasButtonPathClosureEvidence ? (wrapperResult.handoff?.mode ?? "button_path") : "direct_reply", + }; +} + +async function runLongTaskGateLock(workspaceDir: string, wrapperResult: any): Promise { + const gateLockPath = path.join(workspaceDir, "scripts", "long_task_gate_lock.mjs"); + const input = buildGateLockInput(wrapperResult); + return runJsonScript(gateLockPath, workspaceDir, input, LONG_TASK_GATE_LOCK_TIMEOUT_MS); +} + function buildWrapperEnforcement(wrapperResult: any): string[] { const lines = [ "- Treat this as ingress preflight guidance from the wrapper MVP.", @@ -91,6 +158,67 @@ function buildWrapperEnforcement(wrapperResult: any): string[] { return lines; } +function buildWrapperHardGate(wrapperResult: any): string[] { + const lines: string[] = []; + + if (wrapperResult.classification === "long_task") { + lines.push("- HARD_GATE: If you intend to proceed as ordinary chat, you must explicitly justify why long-task governance does not apply."); + } + + if (wrapperResult.handoff?.mode === "button_path") { + lines.push("- HARD_GATE: Do not end this flow with a plain-text choice menu. Use Telegram inline buttons or execute the most reasonable next step directly."); + } + + if (wrapperResult.silentCandidate === true && wrapperResult.silentLaunchOk === false) { + lines.push("- HARD_GATE: Do NOT launch or continue this task in silent mode in its current form."); + lines.push("- HARD_GATE: Before any silent execution, satisfy the required next action or downgrade to non-silent follow-up."); + } + + return lines; +} + +function buildGateLockBlock(gateLockResult: GateLockResult | null): string { + if (!gateLockResult) { + return [ + "[LONG_TASK_GATE_LOCK]", + "gateStatus=degraded", + "gateRequired=unknown", + "- ENFORCEMENT: Gate-lock evaluator unavailable; keep existing long-task safeguards in force.", + "- ENFORCEMENT: Do not claim you have progressed into the next task or are already pushing the next step unless you have concrete evidence such as actual dispatch, tool calls, file changes, or a persisted checkpoint artifact.", + "- HARD_GATE: Evaluator unavailable is not permission to claim silent continuation or next-task progression without verifiable evidence.", + "- HARD_GATE: Fall back to a non-silent, evidence-preserving follow-up if you cannot prove checkpoint state or concrete execution.", + "[/LONG_TASK_GATE_LOCK]", + "", + ].join("\n"); + } + + const lines = [ + "[LONG_TASK_GATE_LOCK]", + `gateRequired=${gateLockResult.gateRequired}`, + `gateStatus=${gateLockResult.gateStatus}`, + ...(gateLockResult.reasons ?? []).map((reason) => `reason=${reason}`), + ...((gateLockResult.requiredEvidence ?? []).map((requirement) => { + const fields = (requirement.acceptedFields ?? []).join(","); + return `requiredEvidence=${requirement.evidenceKey ?? "unknown"};fields=${fields};requiredValue=${requirement.requiredValue ?? "unknown"}`; + })), + ...((gateLockResult.allowedResponseModes ?? []).map((mode) => `allowedResponseMode=${mode}`)), + "- ENFORCEMENT: Do not claim you have progressed into the next task or are already pushing the next step unless you have concrete evidence such as actual dispatch, tool calls, file changes, or a persisted checkpoint artifact.", + "- ENFORCEMENT: Forbidden path: plain-text handoff that pretends the long task is already continuing without an externalized checkpoint.", + "- ENFORCEMENT: Forbidden path: stating you have already entered the next task/step when the record only contains planning language and no concrete execution evidence.", + ]; + + if (gateLockResult.gateStatus === "fail") { + lines.push("- HARD_GATE: Block any plain-text handoff or silent-continuation claim when externalized checkpoint evidence is missing."); + lines.push("- HARD_GATE: Block any reply path that says you already moved into the next task or are advancing the next step without concrete execution evidence."); + lines.push("- HARD_GATE: Do not say you are already on the next task, already dispatched follow-up work, or already progressing in background unless you can point to actual tool execution, file changes, emitted messages, or checkpoint records."); + lines.push("- HARD_GATE: If required evidence is missing, ask for/produce the checkpoint or downgrade to a non-silent, evidence-preserving follow-up."); + lines.push("- HARD_GATE: If owner decision is involved, do not replace button-path closure with plain-text handoff."); + } + + lines.push("[/LONG_TASK_GATE_LOCK]", ""); + return lines.join("\n"); +} + /** * Force Recall hook handler * @@ -114,8 +242,9 @@ const forceRecall = async (event: any) => { safeReadText(soulPath), runLongTaskWrapper(workspaceDir, ctx), ]); + const gateLockResult = wrapperResult ? await runLongTaskGateLock(workspaceDir, wrapperResult) : null; - if (!rulebook && !soul && !wrapperResult) return; + if (!rulebook && !soul && !wrapperResult && !gateLockResult) return; const wrapperBlock = wrapperResult ? [ @@ -131,6 +260,7 @@ const forceRecall = async (event: any) => { wrapperResult.requiredNextAction ? `requiredNextAction=${wrapperResult.requiredNextAction}` : null, wrapperResult.handoff?.mode ? `handoff.mode=${wrapperResult.handoff.mode}` : null, ...buildWrapperEnforcement(wrapperResult), + ...buildWrapperHardGate(wrapperResult), "[/LONG_TASK_GOVERNOR_PREFLIGHT]", "", ] @@ -138,12 +268,15 @@ const forceRecall = async (event: any) => { .join("\n") : ""; + const gateLockBlock = buildGateLockBlock(gateLockResult); + const recallBlock = [ "[RECALL_GATE] Mandatory recall before ANY technical action/tool use.", "- You MUST consult and follow the key rules from RULEBOOK + SOUL.", "- If you are about to run tools, change configs, modify code, or delegate agents: restate the applicable rules first.", "", wrapperBlock || null, + gateLockBlock, rulebook ? `RULEBOOK (source: ${rulebookPath}):\n${clamp(rulebook, 1200)}` : null, soul ? `SOUL (source: ${soulPath}):\n${clamp(soul, 1200)}` : null, "[/RECALL_GATE]", diff --git a/scripts/test_force_recall_long_task_preflight.mjs b/scripts/test_force_recall_long_task_preflight.mjs index 3b64fec..b5f93d4 100644 --- a/scripts/test_force_recall_long_task_preflight.mjs +++ b/scripts/test_force_recall_long_task_preflight.mjs @@ -9,6 +9,7 @@ const __dirname = path.dirname(new URL(import.meta.url).pathname); const repoRoot = path.resolve(__dirname, '..'); const handlerPath = path.join(repoRoot, 'hooks', 'force-recall', 'handler.ts'); const wrapperPath = path.join(repoRoot, 'scripts', 'long_task_governor_wrapper.mjs'); +const gateLockPath = path.join(repoRoot, 'scripts', 'long_task_gate_lock.mjs'); async function importTsModule(tsPath) { const source = await fs.readFile(tsPath, 'utf8'); @@ -18,7 +19,7 @@ async function importTsModule(tsPath) { } async function main() { - await fs.access(wrapperPath); + await Promise.all([fs.access(wrapperPath), fs.access(gateLockPath)]); const { default: forceRecall } = await importTsModule(handlerPath); assert.equal(typeof forceRecall, 'function', 'force-recall handler should export default function'); @@ -48,7 +49,20 @@ async function main() { 'classification=long_task', 'silentLaunchOk=false', 'handoff.mode=button_path', - 'HARD_GATE:', + '[LONG_TASK_GATE_LOCK]', + 'gateStatus=fail', + 'requiredEvidence=externalizedCheckpoint', + 'requiredEvidence=concreteNextAction', + 'requiredEvidence=buttonPathMode', + 'reason=silent long-task cannot continue without externalized checkpoint path', + 'reason=claimed execution requires evidence of a concrete next action', + 'reason=owner decision flow must end in button-path, not plain text', + 'ENFORCEMENT: Forbidden path: plain-text handoff that pretends the long task is already continuing without an externalized checkpoint.', + 'ENFORCEMENT: Forbidden path: stating you have already entered the next task/step when the record only contains planning language and no concrete execution evidence.', + 'HARD_GATE: Block any plain-text handoff or silent-continuation claim when externalized checkpoint evidence is missing.', + 'HARD_GATE: Block any reply path that says you already moved into the next task or are advancing the next step without concrete execution evidence.', + 'HARD_GATE: Do not say you are already on the next task, already dispatched follow-up work, or already progressing in background unless you can point to actual tool execution, file changes, emitted messages, or checkpoint records.', + 'Do not claim you have progressed into the next task or are already pushing the next step unless you have concrete evidence such as actual dispatch, tool calls, file changes, or a persisted checkpoint artifact.', ]; for (const snippet of expectedSnippets) { @@ -58,7 +72,7 @@ async function main() { const summary = { ok: true, checked: expectedSnippets, - bodyPreview: injected.split('\n').slice(0, 20), + bodyPreview: injected.split('\n').slice(0, 30), }; process.stdout.write(JSON.stringify(summary, null, 2) + '\n');