feat: inject long-task gate lock into force-recall hook

2026-04-23 11:47:53 +08:00
parent ca6174de21
commit 91529166fe
2 changed files with 174 additions and 27 deletions
--- a/hooks/force-recall/handler.ts
+++ b/hooks/force-recall/handler.ts
@@ -5,6 +5,20 @@ import { execFile } from "node:child_process";
 import { promisify } from "node:util";

 const execFileAsync = promisify(execFile);
+const LONG_TASK_WRAPPER_TIMEOUT_MS = 8000;
+const LONG_TASK_GATE_LOCK_TIMEOUT_MS = 8000;
+
+type GateLockResult = {
+  gateRequired: boolean;
+  gateStatus: "not_applicable" | "pass" | "fail";
+  reasons?: string[];
+  requiredEvidence?: Array<{
+    evidenceKey?: string;
+    acceptedFields?: string[];
+    requiredValue?: string;
+  }>;
+  allowedResponseModes?: string[];
+};

 function clamp(s: string, max = 1200): string {
  if (!s) return s;
@@ -22,36 +36,20 @@ async function safeReadText(filePath: string): Promise<string | null> {
  }
 }

-async function runLongTaskWrapper(workspaceDir: string, ctx: any): Promise<any | null> {
+async function runJsonScript(scriptPath: string, workspaceDir: string, input: Record<string, unknown>, timeout: number): Promise<any | null> {
  let tempInputPath: string | null = null;

  try {
-    const wrapperPath = path.join(workspaceDir, "scripts", "long_task_governor_wrapper.mjs");
-    const input = {
-      requestText: (ctx.body ?? ctx.content ?? ctx.bodyForAgent ?? "") as string,
-      hasFilesOrSystems: false,
-      needsWaiting: false,
-      needsSubagent: false,
-      needsOwnerDecision: false,
-      canReplyNow: false,
-      taskName: "Hook preflight classification",
-      currentStep: "Classifying request at preprocessed hook",
-      nextStep: "Carry governor recommendation into prompt context",
-      nextReportCondition: "At next meaningful milestone",
-      waitingOn: "none",
-      blocker: "none",
-      checkpointTrigger: "",
-      externalizedTrigger: "",
-      triggerKind: "",
-    };
-
-    tempInputPath = path.join(os.tmpdir(), `openclaw-long-task-hook-${process.pid}-${Date.now()}.json`);
+    tempInputPath = path.join(
+      os.tmpdir(),
+      `openclaw-hook-${path.basename(scriptPath, path.extname(scriptPath))}-${process.pid}-${Date.now()}.json`,
+    );
    await fs.writeFile(tempInputPath, JSON.stringify(input), "utf-8");

-    const { stdout } = await execFileAsync("node", [wrapperPath, "--compact", "--input", tempInputPath], {
+    const { stdout } = await execFileAsync("node", [scriptPath, "--compact", "--input", tempInputPath], {
      cwd: workspaceDir,
      maxBuffer: 1024 * 1024,
-      timeout: 8000,
+      timeout,
    });

    return JSON.parse(stdout);
@@ -64,6 +62,75 @@ async function runLongTaskWrapper(workspaceDir: string, ctx: any): Promise<any |
  }
 }

+async function runLongTaskWrapper(workspaceDir: string, ctx: any): Promise<any | null> {
+  const wrapperPath = path.join(workspaceDir, "scripts", "long_task_governor_wrapper.mjs");
+  const input = {
+    requestText: (ctx.body ?? ctx.content ?? ctx.bodyForAgent ?? "") as string,
+    hasFilesOrSystems: false,
+    needsWaiting: false,
+    needsSubagent: false,
+    needsOwnerDecision: false,
+    canReplyNow: false,
+    taskName: "Hook preflight classification",
+    currentStep: "Classifying request at preprocessed hook",
+    nextStep: "Carry governor recommendation into prompt context",
+    nextReportCondition: "At next meaningful milestone",
+    waitingOn: "none",
+    blocker: "none",
+    checkpointTrigger: "",
+    externalizedTrigger: "",
+    triggerKind: "",
+  };
+
+  return runJsonScript(wrapperPath, workspaceDir, input, LONG_TASK_WRAPPER_TIMEOUT_MS);
+}
+
+function buildGateLockInput(wrapperResult: any): Record<string, unknown> {
+  if (!wrapperResult || wrapperResult.classification !== "long_task") {
+    return { classification: wrapperResult?.classification ?? "general_chat" };
+  }
+
+  const needsOwnerDecision = wrapperResult.needsOwnerDecision === true;
+  const silentCandidate = wrapperResult.silentCandidate === true;
+  const requiredNextAction = typeof wrapperResult.requiredNextAction === "string"
+    ? wrapperResult.requiredNextAction.trim()
+    : "";
+  const hasConcreteExecutionEvidence = Boolean(
+    requiredNextAction
+    && ![
+      "",
+      "proceed_with_normal_long_task_flow",
+      "proceed_with_silent_launch",
+      "define_first_checkpoint_trigger_before_silent_launch",
+      "bind_externalized_checkpoint_path_or_abort_silent_launch",
+    ].includes(requiredNextAction),
+  );
+  const hasExternalizedCheckpointEvidence = wrapperResult.silentLaunchOk === true
+    && typeof wrapperResult.taskRecord?.task_name === "string"
+    && wrapperResult.taskRecord.task_name.trim().length > 0;
+  const hasButtonPathClosureEvidence = needsOwnerDecision && wrapperResult.silentLaunchOk === true;
+
+  return {
+    classification: wrapperResult.classification,
+    silentContinuation: silentCandidate,
+    claimedExecution: true,
+    needsOwnerDecision,
+    nextStep: hasConcreteExecutionEvidence ? requiredNextAction : "",
+    requiredNextAction: hasConcreteExecutionEvidence ? requiredNextAction : "",
+    concreteNextAction: hasConcreteExecutionEvidence ? requiredNextAction : "",
+    externalizedCheckpointPath: hasExternalizedCheckpointEvidence ? wrapperResult.taskRecord.task_name.trim() : "",
+    externalizedTrigger: hasExternalizedCheckpointEvidence ? "hook-preflight-checkpoint" : "",
+    handoffMode: hasButtonPathClosureEvidence ? (wrapperResult.handoff?.mode ?? "button_path") : "direct_reply",
+    replyClosureMode: hasButtonPathClosureEvidence ? (wrapperResult.handoff?.mode ?? "button_path") : "direct_reply",
+  };
+}
+
+async function runLongTaskGateLock(workspaceDir: string, wrapperResult: any): Promise<GateLockResult | null> {
+  const gateLockPath = path.join(workspaceDir, "scripts", "long_task_gate_lock.mjs");
+  const input = buildGateLockInput(wrapperResult);
+  return runJsonScript(gateLockPath, workspaceDir, input, LONG_TASK_GATE_LOCK_TIMEOUT_MS);
+}
+
 function buildWrapperEnforcement(wrapperResult: any): string[] {
  const lines = [
    "- Treat this as ingress preflight guidance from the wrapper MVP.",
@@ -91,6 +158,67 @@ function buildWrapperEnforcement(wrapperResult: any): string[] {
  return lines;
 }

+function buildWrapperHardGate(wrapperResult: any): string[] {
+  const lines: string[] = [];
+
+  if (wrapperResult.classification === "long_task") {
+    lines.push("- HARD_GATE: If you intend to proceed as ordinary chat, you must explicitly justify why long-task governance does not apply.");
+  }
+
+  if (wrapperResult.handoff?.mode === "button_path") {
+    lines.push("- HARD_GATE: Do not end this flow with a plain-text choice menu. Use Telegram inline buttons or execute the most reasonable next step directly.");
+  }
+
+  if (wrapperResult.silentCandidate === true && wrapperResult.silentLaunchOk === false) {
+    lines.push("- HARD_GATE: Do NOT launch or continue this task in silent mode in its current form.");
+    lines.push("- HARD_GATE: Before any silent execution, satisfy the required next action or downgrade to non-silent follow-up.");
+  }
+
+  return lines;
+}
+
+function buildGateLockBlock(gateLockResult: GateLockResult | null): string {
+  if (!gateLockResult) {
+    return [
+      "[LONG_TASK_GATE_LOCK]",
+      "gateStatus=degraded",
+      "gateRequired=unknown",
+      "- ENFORCEMENT: Gate-lock evaluator unavailable; keep existing long-task safeguards in force.",
+      "- ENFORCEMENT: Do not claim you have progressed into the next task or are already pushing the next step unless you have concrete evidence such as actual dispatch, tool calls, file changes, or a persisted checkpoint artifact.",
+      "- HARD_GATE: Evaluator unavailable is not permission to claim silent continuation or next-task progression without verifiable evidence.",
+      "- HARD_GATE: Fall back to a non-silent, evidence-preserving follow-up if you cannot prove checkpoint state or concrete execution.",
+      "[/LONG_TASK_GATE_LOCK]",
+      "",
+    ].join("\n");
+  }
+
+  const lines = [
+    "[LONG_TASK_GATE_LOCK]",
+    `gateRequired=${gateLockResult.gateRequired}`,
+    `gateStatus=${gateLockResult.gateStatus}`,
+    ...(gateLockResult.reasons ?? []).map((reason) => `reason=${reason}`),
+    ...((gateLockResult.requiredEvidence ?? []).map((requirement) => {
+      const fields = (requirement.acceptedFields ?? []).join(",");
+      return `requiredEvidence=${requirement.evidenceKey ?? "unknown"};fields=${fields};requiredValue=${requirement.requiredValue ?? "unknown"}`;
+    })),
+    ...((gateLockResult.allowedResponseModes ?? []).map((mode) => `allowedResponseMode=${mode}`)),
+    "- ENFORCEMENT: Do not claim you have progressed into the next task or are already pushing the next step unless you have concrete evidence such as actual dispatch, tool calls, file changes, or a persisted checkpoint artifact.",
+    "- ENFORCEMENT: Forbidden path: plain-text handoff that pretends the long task is already continuing without an externalized checkpoint.",
+    "- ENFORCEMENT: Forbidden path: stating you have already entered the next task/step when the record only contains planning language and no concrete execution evidence.",
+  ];
+
+  if (gateLockResult.gateStatus === "fail") {
+    lines.push("- HARD_GATE: Block any plain-text handoff or silent-continuation claim when externalized checkpoint evidence is missing.");
+    lines.push("- HARD_GATE: Block any reply path that says you already moved into the next task or are advancing the next step without concrete execution evidence.");
+    lines.push("- HARD_GATE: Do not say you are already on the next task, already dispatched follow-up work, or already progressing in background unless you can point to actual tool execution, file changes, emitted messages, or checkpoint records.");
+    lines.push("- HARD_GATE: If required evidence is missing, ask for/produce the checkpoint or downgrade to a non-silent, evidence-preserving follow-up.");
+    lines.push("- HARD_GATE: If owner decision is involved, do not replace button-path closure with plain-text handoff.");
+  }
+
+  lines.push("[/LONG_TASK_GATE_LOCK]", "");
+  return lines.join("\n");
+}
+
 /**
 * Force Recall hook handler
 *
@@ -114,8 +242,9 @@ const forceRecall = async (event: any) => {
    safeReadText(soulPath),
    runLongTaskWrapper(workspaceDir, ctx),
  ]);
+  const gateLockResult = wrapperResult ? await runLongTaskGateLock(workspaceDir, wrapperResult) : null;

-  if (!rulebook && !soul && !wrapperResult) return;
+  if (!rulebook && !soul && !wrapperResult && !gateLockResult) return;

  const wrapperBlock = wrapperResult
    ? [
@@ -131,6 +260,7 @@ const forceRecall = async (event: any) => {
        wrapperResult.requiredNextAction ? `requiredNextAction=${wrapperResult.requiredNextAction}` : null,
        wrapperResult.handoff?.mode ? `handoff.mode=${wrapperResult.handoff.mode}` : null,
        ...buildWrapperEnforcement(wrapperResult),
+        ...buildWrapperHardGate(wrapperResult),
        "[/LONG_TASK_GOVERNOR_PREFLIGHT]",
        "",
      ]
@@ -138,12 +268,15 @@ const forceRecall = async (event: any) => {
        .join("\n")
    : "";

+  const gateLockBlock = buildGateLockBlock(gateLockResult);
+
  const recallBlock = [
    "[RECALL_GATE] Mandatory recall before ANY technical action/tool use.",
    "- You MUST consult and follow the key rules from RULEBOOK + SOUL.",
    "- If you are about to run tools, change configs, modify code, or delegate agents: restate the applicable rules first.",
    "",
    wrapperBlock || null,
+    gateLockBlock,
    rulebook ? `RULEBOOK (source: ${rulebookPath}):\n${clamp(rulebook, 1200)}` : null,
    soul ? `SOUL (source: ${soulPath}):\n${clamp(soul, 1200)}` : null,
    "[/RECALL_GATE]",
--- a/scripts/test_force_recall_long_task_preflight.mjs
+++ b/scripts/test_force_recall_long_task_preflight.mjs
@@ -9,6 +9,7 @@ const __dirname = path.dirname(new URL(import.meta.url).pathname);
 const repoRoot = path.resolve(__dirname, '..');
 const handlerPath = path.join(repoRoot, 'hooks', 'force-recall', 'handler.ts');
 const wrapperPath = path.join(repoRoot, 'scripts', 'long_task_governor_wrapper.mjs');
+const gateLockPath = path.join(repoRoot, 'scripts', 'long_task_gate_lock.mjs');

 async function importTsModule(tsPath) {
  const source = await fs.readFile(tsPath, 'utf8');
@@ -18,7 +19,7 @@ async function importTsModule(tsPath) {
 }

 async function main() {
-  await fs.access(wrapperPath);
+  await Promise.all([fs.access(wrapperPath), fs.access(gateLockPath)]);
  const { default: forceRecall } = await importTsModule(handlerPath);
  assert.equal(typeof forceRecall, 'function', 'force-recall handler should export default function');

@@ -48,7 +49,20 @@ async function main() {
    'classification=long_task',
    'silentLaunchOk=false',
    'handoff.mode=button_path',
-    'HARD_GATE:',
+    '[LONG_TASK_GATE_LOCK]',
+    'gateStatus=fail',
+    'requiredEvidence=externalizedCheckpoint',
+    'requiredEvidence=concreteNextAction',
+    'requiredEvidence=buttonPathMode',
+    'reason=silent long-task cannot continue without externalized checkpoint path',
+    'reason=claimed execution requires evidence of a concrete next action',
+    'reason=owner decision flow must end in button-path, not plain text',
+    'ENFORCEMENT: Forbidden path: plain-text handoff that pretends the long task is already continuing without an externalized checkpoint.',
+    'ENFORCEMENT: Forbidden path: stating you have already entered the next task/step when the record only contains planning language and no concrete execution evidence.',
+    'HARD_GATE: Block any plain-text handoff or silent-continuation claim when externalized checkpoint evidence is missing.',
+    'HARD_GATE: Block any reply path that says you already moved into the next task or are advancing the next step without concrete execution evidence.',
+    'HARD_GATE: Do not say you are already on the next task, already dispatched follow-up work, or already progressing in background unless you can point to actual tool execution, file changes, emitted messages, or checkpoint records.',
+    'Do not claim you have progressed into the next task or are already pushing the next step unless you have concrete evidence such as actual dispatch, tool calls, file changes, or a persisted checkpoint artifact.',
  ];

  for (const snippet of expectedSnippets) {
@@ -58,7 +72,7 @@ async function main() {
  const summary = {
    ok: true,
    checked: expectedSnippets,
-    bodyPreview: injected.split('\n').slice(0, 20),
+    bodyPreview: injected.split('\n').slice(0, 30),
  };

  process.stdout.write(JSON.stringify(summary, null, 2) + '\n');