From 6584bc16a8a34355e96aa1a742c907d9b05c98fc Mon Sep 17 00:00:00 2001 From: Eve Date: Thu, 7 May 2026 17:52:11 +0800 Subject: [PATCH] fix: restore minimal long-task watchdog execution chain --- memory/watchdog-state.json | 22 +- scripts/install_long_task_watchdog_cron.sh | 18 ++ scripts/long_task_watchdog.mjs | 263 ++++++++++++++++++ scripts/test_long_task_watchdog.mjs | 159 +++++++++++ state/cron/long-task-watchdog.cron | 1 + ...-reporting-governance-plugin-watchdog.json | 30 ++ state/long-task-watchdog/README.md | 19 ++ 7 files changed, 511 insertions(+), 1 deletion(-) create mode 100755 scripts/install_long_task_watchdog_cron.sh create mode 100755 scripts/long_task_watchdog.mjs create mode 100755 scripts/test_long_task_watchdog.mjs create mode 100644 state/cron/long-task-watchdog.cron create mode 100644 state/long-task-watchdog/2026-05-07T090000Z-reporting-governance-plugin-watchdog.json create mode 100644 state/long-task-watchdog/README.md diff --git a/memory/watchdog-state.json b/memory/watchdog-state.json index 0b39089..4f845f4 100644 --- a/memory/watchdog-state.json +++ b/memory/watchdog-state.json @@ -1,5 +1,5 @@ { - "version": 7, + "version": 8, "watchdogs": [ { "id": "paperclip-bootstrap-watchdog", @@ -33,6 +33,26 @@ "lastNudgeAt": "2026-04-21T18:33:00+08:00", "escalationPolicy": "nudge-owner-then-report", "notes": "已依 Eric 總管指示關閉 long-task watchdog:recurring cron 已停用,這筆 watchdog 改為 paused,不再自動催辦或回報。" + }, + { + "id": "reporting-governance-plugin-watchdog", + "task": "reporting-governance plugin spec development", + "status": "active", + "ownerSession": "main-telegram-eric", + "ownerSessionKey": "agent:coder:main", + "ownerAgentId": "coder", + "channel": "telegram", + "target": "864811879", + "reportChannel": "telegram", + "reportTarget": "864811879", + "intervalMinutes": 10, + "startedAt": "2026-05-07T16:46:00+08:00", + "lastMilestoneAt": "2026-05-07T16:46:00+08:00", + "lastAlertAt": "2026-05-07T09:00:00.000Z", + "lastObservedActivityAt": "2026-05-07T16:46:00+08:00", + "lastNudgeAt": null, + "escalationPolicy": "nudge-owner-then-report", + "notes": "恢復最小外部巡查鏈:由 long_task_watchdog runner + 每 10 分鐘 cron snippet 產生外部 evidence;若要接回真正 sessions/message 巡查,再由上層執行器接手。" } ] } diff --git a/scripts/install_long_task_watchdog_cron.sh b/scripts/install_long_task_watchdog_cron.sh new file mode 100755 index 0000000..1d1fd03 --- /dev/null +++ b/scripts/install_long_task_watchdog_cron.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)" +CRON_FILE="$ROOT_DIR/state/cron/long-task-watchdog.cron" +LOG_DIR="$ROOT_DIR/state/long-task-watchdog" +RUNNER="$ROOT_DIR/scripts/long_task_watchdog.mjs" +STATE_FILE="$ROOT_DIR/memory/watchdog-state.json" + +mkdir -p "$(dirname "$CRON_FILE")" "$LOG_DIR" + +cat >"$CRON_FILE" <> "$LOG_DIR/cron.log" 2>&1 +EOF + +printf 'Wrote cron snippet: %s\n' "$CRON_FILE" +printf 'To install for current user, run:\n' +printf ' (crontab -l 2>/dev/null; cat "%s") | crontab -\n' "$CRON_FILE" diff --git a/scripts/long_task_watchdog.mjs b/scripts/long_task_watchdog.mjs new file mode 100755 index 0000000..9df559d --- /dev/null +++ b/scripts/long_task_watchdog.mjs @@ -0,0 +1,263 @@ +#!/usr/bin/env node + +import fs from 'node:fs'; +import path from 'node:path'; +import process from 'node:process'; + +const ROOT_DIR = path.resolve(import.meta.dirname, '..'); +const DEFAULT_STATE_PATH = path.join(ROOT_DIR, 'memory', 'watchdog-state.json'); +const DEFAULT_EVIDENCE_DIR = path.join(ROOT_DIR, 'state', 'long-task-watchdog'); + +function parseArgs(argv) { + const args = { + compact: false, + state: DEFAULT_STATE_PATH, + now: null, + evidenceDir: DEFAULT_EVIDENCE_DIR, + writeState: false, + help: false, + }; + + for (let i = 0; i < argv.length; i += 1) { + const token = argv[i]; + if (token === '--compact') { + args.compact = true; + continue; + } + if (token === '--write-state') { + args.writeState = true; + continue; + } + if (token === '--help' || token === '-h') { + args.help = true; + continue; + } + if (token === '--state') { + args.state = argv[i + 1] ?? args.state; + i += 1; + continue; + } + if (token.startsWith('--state=')) { + args.state = token.slice('--state='.length) || args.state; + continue; + } + if (token === '--now') { + args.now = argv[i + 1] ?? null; + i += 1; + continue; + } + if (token.startsWith('--now=')) { + args.now = token.slice('--now='.length) || null; + continue; + } + if (token === '--evidence-dir') { + args.evidenceDir = argv[i + 1] ?? args.evidenceDir; + i += 1; + continue; + } + if (token.startsWith('--evidence-dir=')) { + args.evidenceDir = token.slice('--evidence-dir='.length) || args.evidenceDir; + continue; + } + } + + return args; +} + +function printHelp() { + process.stdout.write([ + 'Usage: node scripts/long_task_watchdog.mjs [--compact] [--write-state] [--state ] [--now ] [--evidence-dir ]', + '', + 'Minimal file-backed long-task watchdog runner.', + ].join('\n') + '\n'); +} + +function parseJsonFile(filePath) { + const raw = fs.readFileSync(filePath, 'utf8'); + return JSON.parse(raw); +} + +function parseTime(value) { + if (typeof value !== 'string' || value.length === 0) return null; + const timestamp = Date.parse(value); + return Number.isNaN(timestamp) ? null : timestamp; +} + +function toIso(value) { + return new Date(value).toISOString(); +} + +function toSafeName(value) { + return String(value || 'watchdog') + .replace(/[^a-zA-Z0-9._-]+/g, '-') + .replace(/^-+|-+$/g, '') + .slice(0, 80) || 'watchdog'; +} + +function evaluateWatchdog(watchdog, nowMs) { + const intervalMinutes = Number.isFinite(watchdog?.intervalMinutes) + ? watchdog.intervalMinutes + : Number.parseInt(String(watchdog?.intervalMinutes ?? '0'), 10); + const intervalMs = intervalMinutes > 0 ? intervalMinutes * 60 * 1000 : 0; + const milestoneMs = parseTime(watchdog?.lastMilestoneAt); + const lastAlertMs = parseTime(watchdog?.lastAlertAt); + const active = watchdog?.status === 'active'; + + if (!active) { + return { + id: watchdog?.id ?? null, + active: false, + overdue: false, + action: 'skip_inactive', + reason: 'watchdog is not active', + }; + } + + if (!intervalMs || milestoneMs === null) { + return { + id: watchdog?.id ?? null, + active: true, + overdue: false, + action: 'invalid_contract', + reason: 'intervalMinutes or lastMilestoneAt is missing/invalid', + }; + } + + const dueAtMs = milestoneMs + intervalMs; + const overdue = nowMs >= dueAtMs; + + if (!overdue) { + return { + id: watchdog?.id ?? null, + active: true, + overdue: false, + action: 'within_interval', + reason: 'last milestone is still within interval', + dueAt: toIso(dueAtMs), + minutesOverdue: 0, + }; + } + + const lastAlertStillFresh = lastAlertMs !== null && lastAlertMs >= dueAtMs; + if (lastAlertStillFresh) { + return { + id: watchdog?.id ?? null, + active: true, + overdue: true, + action: 'already_alerted_this_interval', + reason: 'lastAlertAt already covers current overdue interval', + dueAt: toIso(dueAtMs), + minutesOverdue: Math.floor((nowMs - dueAtMs) / 60000), + }; + } + + return { + id: watchdog?.id ?? null, + active: true, + overdue: true, + action: 'emit_external_evidence', + reason: 'active watchdog is overdue and has not been externally evidenced for this interval', + dueAt: toIso(dueAtMs), + minutesOverdue: Math.floor((nowMs - dueAtMs) / 60000), + }; +} + +function ensureDir(dirPath) { + fs.mkdirSync(dirPath, { recursive: true }); +} + +function writeEvidence(evidenceDir, watchdog, evaluation, nowIso) { + ensureDir(evidenceDir); + const fileName = `${nowIso.replace(/[:]/g, '').replace(/\.\d{3}Z$/, 'Z')}-${toSafeName(watchdog.id)}.json`; + const filePath = path.join(evidenceDir, fileName); + const payload = { + generatedAt: nowIso, + tool: 'long_task_watchdog', + watchdog: { + id: watchdog.id, + task: watchdog.task, + ownerSession: watchdog.ownerSession ?? null, + ownerSessionKey: watchdog.ownerSessionKey ?? null, + reportChannel: watchdog.reportChannel ?? watchdog.channel ?? null, + reportTarget: watchdog.reportTarget ?? watchdog.target ?? null, + intervalMinutes: watchdog.intervalMinutes, + lastMilestoneAt: watchdog.lastMilestoneAt ?? null, + lastAlertAt: watchdog.lastAlertAt ?? null, + }, + evaluation, + nextExpectedExternalAction: [ + 'nudge owner session', + 'report owner-visible checkpoint', + 'or respawn / inspect locally if owner appears stalled', + ], + }; + fs.writeFileSync(filePath, `${JSON.stringify(payload, null, 2)}\n`, 'utf8'); + return filePath; +} + +function main() { + const args = parseArgs(process.argv.slice(2)); + if (args.help) { + printHelp(); + process.exit(0); + } + + const nowMs = args.now ? parseTime(args.now) : Date.now(); + if (nowMs === null) { + process.stderr.write('Invalid --now value\n'); + process.exit(1); + } + const nowIso = toIso(nowMs); + + const state = parseJsonFile(args.state); + const watchdogs = Array.isArray(state.watchdogs) ? state.watchdogs : []; + const evaluations = watchdogs.map((watchdog) => ({ + watchdogId: watchdog?.id ?? null, + ...evaluateWatchdog(watchdog, nowMs), + })); + + const evidenceWrites = []; + const nextWatchdogs = watchdogs.map((watchdog, index) => { + const evaluation = evaluations[index]; + if (evaluation.action !== 'emit_external_evidence') { + return watchdog; + } + const evidencePath = writeEvidence(args.evidenceDir, watchdog, evaluation, nowIso); + evidenceWrites.push({ watchdogId: watchdog.id, path: evidencePath }); + return { + ...watchdog, + lastAlertAt: nowIso, + lastObservedActivityAt: watchdog.lastObservedActivityAt ?? watchdog.lastMilestoneAt ?? null, + lastNudgeAt: watchdog.lastNudgeAt ?? null, + }; + }); + + if (args.writeState) { + const nextState = { + ...state, + watchdogs: nextWatchdogs, + }; + fs.writeFileSync(args.state, `${JSON.stringify(nextState, null, 2)}\n`, 'utf8'); + } + + const response = { + ok: true, + tool: 'long_task_watchdog', + version: 'mvp-v1', + statePath: path.resolve(args.state), + evidenceDir: path.resolve(args.evidenceDir), + now: nowIso, + writeState: args.writeState, + result: { + activeCount: watchdogs.filter((item) => item?.status === 'active').length, + overdueCount: evaluations.filter((item) => item.overdue === true).length, + emittedCount: evidenceWrites.length, + evaluations, + evidenceWrites, + }, + }; + + process.stdout.write(`${JSON.stringify(response, null, args.compact ? 0 : 2)}\n`); +} + +main(); diff --git a/scripts/test_long_task_watchdog.mjs b/scripts/test_long_task_watchdog.mjs new file mode 100755 index 0000000..450f742 --- /dev/null +++ b/scripts/test_long_task_watchdog.mjs @@ -0,0 +1,159 @@ +#!/usr/bin/env node + +import assert from 'node:assert/strict'; +import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync, readdirSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import path from 'node:path'; +import process from 'node:process'; +import { spawnSync } from 'node:child_process'; + +const ROOT_DIR = path.resolve(import.meta.dirname, '..'); +const WATCHDOG_SCRIPT = path.join(ROOT_DIR, 'scripts', 'long_task_watchdog.mjs'); + +function createFixtureRunner() { + const fixtureRoot = mkdtempSync(path.join(tmpdir(), 'long-task-watchdog-test-')); + const statePath = path.join(fixtureRoot, 'watchdog-state.json'); + const evidenceDir = path.join(fixtureRoot, 'evidence'); + mkdirSync(evidenceDir, { recursive: true }); + + function writeState(content) { + const body = typeof content === 'string' ? content : JSON.stringify(content, null, 2); + writeFileSync(statePath, body); + return statePath; + } + + function run(args = []) { + const result = spawnSync(process.execPath, [WATCHDOG_SCRIPT, '--state', statePath, '--evidence-dir', evidenceDir, ...args], { + cwd: ROOT_DIR, + encoding: 'utf8', + }); + return { + status: result.status, + stdout: result.stdout ?? '', + stderr: result.stderr ?? '', + }; + } + + function readState() { + return JSON.parse(readFileSync(statePath, 'utf8')); + } + + function listEvidence() { + return readdirSync(evidenceDir).sort(); + } + + function cleanup() { + rmSync(fixtureRoot, { recursive: true, force: true }); + } + + return { statePath, evidenceDir, writeState, run, readState, listEvidence, cleanup }; +} + +const tests = []; +function test(name, fn) { tests.push({ name, fn }); } + +function printResult(prefix, name, detail = '') { + process.stdout.write(`${prefix} ${name}${detail ? ` ${detail}` : ''}\n`); +} + +test('inactive watchdogs do not emit evidence', () => { + const runner = createFixtureRunner(); + try { + runner.writeState({ + version: 1, + watchdogs: [ + { + id: 'paused-watchdog', + task: 'paused task', + status: 'paused', + intervalMinutes: 10, + lastMilestoneAt: '2026-05-07T08:00:00.000Z', + }, + ], + }); + + const result = runner.run(['--compact', '--now', '2026-05-07T08:20:00.000Z']); + assert.equal(result.status, 0, result.stderr); + const payload = JSON.parse(result.stdout); + assert.equal(payload.result.emittedCount, 0); + assert.deepEqual(runner.listEvidence(), []); + } finally { + runner.cleanup(); + } +}); + +test('overdue active watchdog emits external evidence and updates lastAlertAt when write-state is enabled', () => { + const runner = createFixtureRunner(); + try { + runner.writeState({ + version: 1, + watchdogs: [ + { + id: 'reporting-governance-plugin-watchdog', + task: 'reporting-governance plugin spec development', + status: 'active', + ownerSessionKey: 'agent:coder:main', + reportChannel: 'telegram', + reportTarget: '864811879', + intervalMinutes: 10, + lastMilestoneAt: '2026-05-07T08:00:00.000Z', + lastAlertAt: null, + }, + ], + }); + + const result = runner.run(['--compact', '--write-state', '--now', '2026-05-07T08:20:00.000Z']); + assert.equal(result.status, 0, result.stderr); + const payload = JSON.parse(result.stdout); + assert.equal(payload.result.emittedCount, 1); + const evidenceFiles = runner.listEvidence(); + assert.equal(evidenceFiles.length, 1); + + const nextState = runner.readState(); + assert.equal(nextState.watchdogs[0].lastAlertAt, '2026-05-07T08:20:00.000Z'); + } finally { + runner.cleanup(); + } +}); + +test('same interval is not alerted twice once lastAlertAt covers the overdue window', () => { + const runner = createFixtureRunner(); + try { + runner.writeState({ + version: 1, + watchdogs: [ + { + id: 'reporting-governance-plugin-watchdog', + task: 'reporting-governance plugin spec development', + status: 'active', + intervalMinutes: 10, + lastMilestoneAt: '2026-05-07T08:00:00.000Z', + lastAlertAt: '2026-05-07T08:12:00.000Z', + }, + ], + }); + + const result = runner.run(['--compact', '--write-state', '--now', '2026-05-07T08:15:00.000Z']); + assert.equal(result.status, 0, result.stderr); + const payload = JSON.parse(result.stdout); + assert.equal(payload.result.emittedCount, 0); + assert.deepEqual(runner.listEvidence(), []); + } finally { + runner.cleanup(); + } +}); + +let failures = 0; +for (const { name, fn } of tests) { + try { + fn(); + printResult('ok', name); + } catch (error) { + failures += 1; + printResult('not ok', name, `- ${error instanceof Error ? error.message : String(error)}`); + } +} + +if (failures > 0) { + process.exit(1); +} diff --git a/state/cron/long-task-watchdog.cron b/state/cron/long-task-watchdog.cron new file mode 100644 index 0000000..c26b798 --- /dev/null +++ b/state/cron/long-task-watchdog.cron @@ -0,0 +1 @@ +*/10 * * * * cd "/home/alice/.openclaw/workspace/.worktrees/reporting-governance-plugin" && /usr/bin/env node "/home/alice/.openclaw/workspace/.worktrees/reporting-governance-plugin/scripts/long_task_watchdog.mjs" --write-state --state "/home/alice/.openclaw/workspace/.worktrees/reporting-governance-plugin/memory/watchdog-state.json" --evidence-dir "/home/alice/.openclaw/workspace/.worktrees/reporting-governance-plugin/state/long-task-watchdog" >> "/home/alice/.openclaw/workspace/.worktrees/reporting-governance-plugin/state/long-task-watchdog/cron.log" 2>&1 diff --git a/state/long-task-watchdog/2026-05-07T090000Z-reporting-governance-plugin-watchdog.json b/state/long-task-watchdog/2026-05-07T090000Z-reporting-governance-plugin-watchdog.json new file mode 100644 index 0000000..0fc39ab --- /dev/null +++ b/state/long-task-watchdog/2026-05-07T090000Z-reporting-governance-plugin-watchdog.json @@ -0,0 +1,30 @@ +{ + "generatedAt": "2026-05-07T09:00:00.000Z", + "tool": "long_task_watchdog", + "watchdog": { + "id": "reporting-governance-plugin-watchdog", + "task": "reporting-governance plugin spec development", + "ownerSession": "main-telegram-eric", + "ownerSessionKey": "agent:coder:main", + "reportChannel": "telegram", + "reportTarget": "864811879", + "intervalMinutes": 10, + "lastMilestoneAt": "2026-05-07T16:46:00+08:00", + "lastAlertAt": null + }, + "evaluation": { + "watchdogId": "reporting-governance-plugin-watchdog", + "id": "reporting-governance-plugin-watchdog", + "active": true, + "overdue": true, + "action": "emit_external_evidence", + "reason": "active watchdog is overdue and has not been externally evidenced for this interval", + "dueAt": "2026-05-07T08:56:00.000Z", + "minutesOverdue": 4 + }, + "nextExpectedExternalAction": [ + "nudge owner session", + "report owner-visible checkpoint", + "or respawn / inspect locally if owner appears stalled" + ] +} diff --git a/state/long-task-watchdog/README.md b/state/long-task-watchdog/README.md new file mode 100644 index 0000000..40b0357 --- /dev/null +++ b/state/long-task-watchdog/README.md @@ -0,0 +1,19 @@ +# Long-task watchdog evidence + +This directory stores file-backed external evidence produced by `scripts/long_task_watchdog.mjs`. + +## Purpose + +The original watchdog chain had state registration but no active external executor. +This directory is the minimal proof surface for the rebuilt execution chain: + +- each overdue run writes a timestamped evidence artifact +- cron can append to `cron.log` +- `memory/watchdog-state.json` records `lastAlertAt` + +## Expected files + +- `cron.log` — append-only stdout/stderr from cron-triggered runs +- `-.json` — overdue evidence artifact per emitted interval + +These artifacts are meant to be machine-verifiable and safe to inspect from the repo worktree.