feat: inject long-task gate lock into force-recall hook

2026-04-23 11:47:53 +08:00
parent ca6174de21
commit 91529166fe
2 changed files with 174 additions and 27 deletions
--- a/hooks/force-recall/handler.ts
+++ b/hooks/force-recall/handler.ts
@@ -5,6 +5,20 @@ import { execFile } from "node:child_process";
 import { promisify } from "node:util";
 const execFileAsync = promisify(execFile);
 const LONG_TASK_WRAPPER_TIMEOUT_MS = 8000;
 const LONG_TASK_GATE_LOCK_TIMEOUT_MS = 8000;
 type GateLockResult = {
  gateRequired: boolean;
  gateStatus: "not_applicable" | "pass" | "fail";
  reasons?: string[];
  requiredEvidence?: Array<{
    evidenceKey?: string;
    acceptedFields?: string[];
    requiredValue?: string;
  }>;
  allowedResponseModes?: string[];
 };
 function clamp(s: string, max = 1200): string {
  if (!s) return s;
@@ -22,36 +36,20 @@ async function safeReadText(filePath: string): Promise<string | null> {
  }
 }
-async function runLongTaskWrapper(workspaceDir: string, ctx: any): Promise<any | null> {
+async function runJsonScript(scriptPath: string, workspaceDir: string, input: Record<string, unknown>, timeout: number): Promise<any | null> {
  let tempInputPath: string | null = null;
  try {
-    const wrapperPath = path.join(workspaceDir, "scripts", "long_task_governor_wrapper.mjs");
+    tempInputPath = path.join(
-    const input = {
+      os.tmpdir(),
-      requestText: (ctx.body ?? ctx.content ?? ctx.bodyForAgent ?? "") as string,
+      `openclaw-hook-${path.basename(scriptPath, path.extname(scriptPath))}-${process.pid}-${Date.now()}.json`,
-      hasFilesOrSystems: false,
+    );
      needsWaiting: false,
      needsSubagent: false,
      needsOwnerDecision: false,
      canReplyNow: false,
      taskName: "Hook preflight classification",
      currentStep: "Classifying request at preprocessed hook",
      nextStep: "Carry governor recommendation into prompt context",
      nextReportCondition: "At next meaningful milestone",
      waitingOn: "none",
      blocker: "none",
      checkpointTrigger: "",
      externalizedTrigger: "",
      triggerKind: "",
    };
    tempInputPath = path.join(os.tmpdir(), `openclaw-long-task-hook-${process.pid}-${Date.now()}.json`);
    await fs.writeFile(tempInputPath, JSON.stringify(input), "utf-8");
-    const { stdout } = await execFileAsync("node", [wrapperPath, "--compact", "--input", tempInputPath], {
+    const { stdout } = await execFileAsync("node", [scriptPath, "--compact", "--input", tempInputPath], {
      cwd: workspaceDir,
      maxBuffer: 1024 * 1024,
-      timeout: 8000,
+      timeout,
    });
    return JSON.parse(stdout);
@@ -64,6 +62,75 @@ async function runLongTaskWrapper(workspaceDir: string, ctx: any): Promise<any |
  }
 }
 async function runLongTaskWrapper(workspaceDir: string, ctx: any): Promise<any | null> {
  const wrapperPath = path.join(workspaceDir, "scripts", "long_task_governor_wrapper.mjs");
  const input = {
    requestText: (ctx.body ?? ctx.content ?? ctx.bodyForAgent ?? "") as string,
    hasFilesOrSystems: false,
    needsWaiting: false,
    needsSubagent: false,
    needsOwnerDecision: false,
    canReplyNow: false,
    taskName: "Hook preflight classification",
    currentStep: "Classifying request at preprocessed hook",
    nextStep: "Carry governor recommendation into prompt context",
    nextReportCondition: "At next meaningful milestone",
    waitingOn: "none",
    blocker: "none",
    checkpointTrigger: "",
    externalizedTrigger: "",
    triggerKind: "",
  };
  return runJsonScript(wrapperPath, workspaceDir, input, LONG_TASK_WRAPPER_TIMEOUT_MS);
 }
 function buildGateLockInput(wrapperResult: any): Record<string, unknown> {
  if (!wrapperResult || wrapperResult.classification !== "long_task") {
    return { classification: wrapperResult?.classification ?? "general_chat" };
  }
  const needsOwnerDecision = wrapperResult.needsOwnerDecision === true;
  const silentCandidate = wrapperResult.silentCandidate === true;
  const requiredNextAction = typeof wrapperResult.requiredNextAction === "string"
    ? wrapperResult.requiredNextAction.trim()
    : "";
  const hasConcreteExecutionEvidence = Boolean(
    requiredNextAction
    && ![
      "",
      "proceed_with_normal_long_task_flow",
      "proceed_with_silent_launch",
      "define_first_checkpoint_trigger_before_silent_launch",
      "bind_externalized_checkpoint_path_or_abort_silent_launch",
    ].includes(requiredNextAction),
  );
  const hasExternalizedCheckpointEvidence = wrapperResult.silentLaunchOk === true
    && typeof wrapperResult.taskRecord?.task_name === "string"
    && wrapperResult.taskRecord.task_name.trim().length > 0;
  const hasButtonPathClosureEvidence = needsOwnerDecision && wrapperResult.silentLaunchOk === true;
  return {
    classification: wrapperResult.classification,
    silentContinuation: silentCandidate,
    claimedExecution: true,
    needsOwnerDecision,
    nextStep: hasConcreteExecutionEvidence ? requiredNextAction : "",
    requiredNextAction: hasConcreteExecutionEvidence ? requiredNextAction : "",
    concreteNextAction: hasConcreteExecutionEvidence ? requiredNextAction : "",
    externalizedCheckpointPath: hasExternalizedCheckpointEvidence ? wrapperResult.taskRecord.task_name.trim() : "",
    externalizedTrigger: hasExternalizedCheckpointEvidence ? "hook-preflight-checkpoint" : "",
    handoffMode: hasButtonPathClosureEvidence ? (wrapperResult.handoff?.mode ?? "button_path") : "direct_reply",
    replyClosureMode: hasButtonPathClosureEvidence ? (wrapperResult.handoff?.mode ?? "button_path") : "direct_reply",
  };
 }
 async function runLongTaskGateLock(workspaceDir: string, wrapperResult: any): Promise<GateLockResult | null> {
  const gateLockPath = path.join(workspaceDir, "scripts", "long_task_gate_lock.mjs");
  const input = buildGateLockInput(wrapperResult);
  return runJsonScript(gateLockPath, workspaceDir, input, LONG_TASK_GATE_LOCK_TIMEOUT_MS);
 }
 function buildWrapperEnforcement(wrapperResult: any): string[] {
  const lines = [
    "- Treat this as ingress preflight guidance from the wrapper MVP.",
@@ -91,6 +158,67 @@ function buildWrapperEnforcement(wrapperResult: any): string[] {
  return lines;
 }
 function buildWrapperHardGate(wrapperResult: any): string[] {
  const lines: string[] = [];
  if (wrapperResult.classification === "long_task") {
    lines.push("- HARD_GATE: If you intend to proceed as ordinary chat, you must explicitly justify why long-task governance does not apply.");
  }
  if (wrapperResult.handoff?.mode === "button_path") {
    lines.push("- HARD_GATE: Do not end this flow with a plain-text choice menu. Use Telegram inline buttons or execute the most reasonable next step directly.");
  }
  if (wrapperResult.silentCandidate === true && wrapperResult.silentLaunchOk === false) {
    lines.push("- HARD_GATE: Do NOT launch or continue this task in silent mode in its current form.");
    lines.push("- HARD_GATE: Before any silent execution, satisfy the required next action or downgrade to non-silent follow-up.");
  }
  return lines;
 }
 function buildGateLockBlock(gateLockResult: GateLockResult | null): string {
  if (!gateLockResult) {
    return [
      "[LONG_TASK_GATE_LOCK]",
      "gateStatus=degraded",
      "gateRequired=unknown",
      "- ENFORCEMENT: Gate-lock evaluator unavailable; keep existing long-task safeguards in force.",
      "- ENFORCEMENT: Do not claim you have progressed into the next task or are already pushing the next step unless you have concrete evidence such as actual dispatch, tool calls, file changes, or a persisted checkpoint artifact.",
      "- HARD_GATE: Evaluator unavailable is not permission to claim silent continuation or next-task progression without verifiable evidence.",
      "- HARD_GATE: Fall back to a non-silent, evidence-preserving follow-up if you cannot prove checkpoint state or concrete execution.",
      "[/LONG_TASK_GATE_LOCK]",
      "",
    ].join("\n");
  }
  const lines = [
    "[LONG_TASK_GATE_LOCK]",
    `gateRequired=${gateLockResult.gateRequired}`,
    `gateStatus=${gateLockResult.gateStatus}`,
    ...(gateLockResult.reasons ?? []).map((reason) => `reason=${reason}`),
    ...((gateLockResult.requiredEvidence ?? []).map((requirement) => {
      const fields = (requirement.acceptedFields ?? []).join(",");
      return `requiredEvidence=${requirement.evidenceKey ?? "unknown"};fields=${fields};requiredValue=${requirement.requiredValue ?? "unknown"}`;
    })),
    ...((gateLockResult.allowedResponseModes ?? []).map((mode) => `allowedResponseMode=${mode}`)),
    "- ENFORCEMENT: Do not claim you have progressed into the next task or are already pushing the next step unless you have concrete evidence such as actual dispatch, tool calls, file changes, or a persisted checkpoint artifact.",
    "- ENFORCEMENT: Forbidden path: plain-text handoff that pretends the long task is already continuing without an externalized checkpoint.",
    "- ENFORCEMENT: Forbidden path: stating you have already entered the next task/step when the record only contains planning language and no concrete execution evidence.",
  ];
  if (gateLockResult.gateStatus === "fail") {
    lines.push("- HARD_GATE: Block any plain-text handoff or silent-continuation claim when externalized checkpoint evidence is missing.");
    lines.push("- HARD_GATE: Block any reply path that says you already moved into the next task or are advancing the next step without concrete execution evidence.");
    lines.push("- HARD_GATE: Do not say you are already on the next task, already dispatched follow-up work, or already progressing in background unless you can point to actual tool execution, file changes, emitted messages, or checkpoint records.");
    lines.push("- HARD_GATE: If required evidence is missing, ask for/produce the checkpoint or downgrade to a non-silent, evidence-preserving follow-up.");
    lines.push("- HARD_GATE: If owner decision is involved, do not replace button-path closure with plain-text handoff.");
  }
  lines.push("[/LONG_TASK_GATE_LOCK]", "");
  return lines.join("\n");
 }
 /**
 * Force Recall hook handler
 *
@@ -114,8 +242,9 @@ const forceRecall = async (event: any) => {
    safeReadText(soulPath),
    runLongTaskWrapper(workspaceDir, ctx),
  ]);
  const gateLockResult = wrapperResult ? await runLongTaskGateLock(workspaceDir, wrapperResult) : null;
-  if (!rulebook && !soul && !wrapperResult) return;
+  if (!rulebook && !soul && !wrapperResult && !gateLockResult) return;
  const wrapperBlock = wrapperResult
    ? [
@@ -131,6 +260,7 @@ const forceRecall = async (event: any) => {
        wrapperResult.requiredNextAction ? `requiredNextAction=${wrapperResult.requiredNextAction}` : null,
        wrapperResult.handoff?.mode ? `handoff.mode=${wrapperResult.handoff.mode}` : null,
        ...buildWrapperEnforcement(wrapperResult),
        ...buildWrapperHardGate(wrapperResult),
        "[/LONG_TASK_GOVERNOR_PREFLIGHT]",
        "",
      ]
@@ -138,12 +268,15 @@ const forceRecall = async (event: any) => {
        .join("\n")
    : "";
  const gateLockBlock = buildGateLockBlock(gateLockResult);
  const recallBlock = [
    "[RECALL_GATE] Mandatory recall before ANY technical action/tool use.",
    "- You MUST consult and follow the key rules from RULEBOOK + SOUL.",
    "- If you are about to run tools, change configs, modify code, or delegate agents: restate the applicable rules first.",
    "",
    wrapperBlock || null,
    gateLockBlock,
    rulebook ? `RULEBOOK (source: ${rulebookPath}):\n${clamp(rulebook, 1200)}` : null,
    soul ? `SOUL (source: ${soulPath}):\n${clamp(soul, 1200)}` : null,
    "[/RECALL_GATE]",
--- a/scripts/test_force_recall_long_task_preflight.mjs
+++ b/scripts/test_force_recall_long_task_preflight.mjs
@@ -9,6 +9,7 @@ const __dirname = path.dirname(new URL(import.meta.url).pathname);
 const repoRoot = path.resolve(__dirname, '..');
 const handlerPath = path.join(repoRoot, 'hooks', 'force-recall', 'handler.ts');
 const wrapperPath = path.join(repoRoot, 'scripts', 'long_task_governor_wrapper.mjs');
 const gateLockPath = path.join(repoRoot, 'scripts', 'long_task_gate_lock.mjs');
 async function importTsModule(tsPath) {
  const source = await fs.readFile(tsPath, 'utf8');
@@ -18,7 +19,7 @@ async function importTsModule(tsPath) {
 }
 async function main() {
-  await fs.access(wrapperPath);
+  await Promise.all([fs.access(wrapperPath), fs.access(gateLockPath)]);
  const { default: forceRecall } = await importTsModule(handlerPath);
  assert.equal(typeof forceRecall, 'function', 'force-recall handler should export default function');
@@ -48,7 +49,20 @@ async function main() {
    'classification=long_task',
    'silentLaunchOk=false',
    'handoff.mode=button_path',
-    'HARD_GATE:',
+    '[LONG_TASK_GATE_LOCK]',
    'gateStatus=fail',
    'requiredEvidence=externalizedCheckpoint',
    'requiredEvidence=concreteNextAction',
    'requiredEvidence=buttonPathMode',
    'reason=silent long-task cannot continue without externalized checkpoint path',
    'reason=claimed execution requires evidence of a concrete next action',
    'reason=owner decision flow must end in button-path, not plain text',
    'ENFORCEMENT: Forbidden path: plain-text handoff that pretends the long task is already continuing without an externalized checkpoint.',
    'ENFORCEMENT: Forbidden path: stating you have already entered the next task/step when the record only contains planning language and no concrete execution evidence.',
    'HARD_GATE: Block any plain-text handoff or silent-continuation claim when externalized checkpoint evidence is missing.',
    'HARD_GATE: Block any reply path that says you already moved into the next task or are advancing the next step without concrete execution evidence.',
    'HARD_GATE: Do not say you are already on the next task, already dispatched follow-up work, or already progressing in background unless you can point to actual tool execution, file changes, emitted messages, or checkpoint records.',
    'Do not claim you have progressed into the next task or are already pushing the next step unless you have concrete evidence such as actual dispatch, tool calls, file changes, or a persisted checkpoint artifact.',
  ];
  for (const snippet of expectedSnippets) {
@@ -58,7 +72,7 @@ async function main() {
  const summary = {
    ok: true,
    checked: expectedSnippets,
-    bodyPreview: injected.split('\n').slice(0, 20),
+    bodyPreview: injected.split('\n').slice(0, 30),
  };
  process.stdout.write(JSON.stringify(summary, null, 2) + '\n');