fix: restore minimal long-task watchdog execution chain

This commit is contained in:
Eve
2026-05-07 17:52:11 +08:00
parent 7763e1e462
commit 6584bc16a8
7 changed files with 511 additions and 1 deletions

View File

@@ -1,5 +1,5 @@
{
"version": 7,
"version": 8,
"watchdogs": [
{
"id": "paperclip-bootstrap-watchdog",
@@ -33,6 +33,26 @@
"lastNudgeAt": "2026-04-21T18:33:00+08:00",
"escalationPolicy": "nudge-owner-then-report",
"notes": "已依 Eric 總管指示關閉 long-task watchdogrecurring cron 已停用,這筆 watchdog 改為 paused不再自動催辦或回報。"
},
{
"id": "reporting-governance-plugin-watchdog",
"task": "reporting-governance plugin spec development",
"status": "active",
"ownerSession": "main-telegram-eric",
"ownerSessionKey": "agent:coder:main",
"ownerAgentId": "coder",
"channel": "telegram",
"target": "864811879",
"reportChannel": "telegram",
"reportTarget": "864811879",
"intervalMinutes": 10,
"startedAt": "2026-05-07T16:46:00+08:00",
"lastMilestoneAt": "2026-05-07T16:46:00+08:00",
"lastAlertAt": "2026-05-07T09:00:00.000Z",
"lastObservedActivityAt": "2026-05-07T16:46:00+08:00",
"lastNudgeAt": null,
"escalationPolicy": "nudge-owner-then-report",
"notes": "恢復最小外部巡查鏈:由 long_task_watchdog runner + 每 10 分鐘 cron snippet 產生外部 evidence若要接回真正 sessions/message 巡查,再由上層執行器接手。"
}
]
}

View File

@@ -0,0 +1,18 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
CRON_FILE="$ROOT_DIR/state/cron/long-task-watchdog.cron"
LOG_DIR="$ROOT_DIR/state/long-task-watchdog"
RUNNER="$ROOT_DIR/scripts/long_task_watchdog.mjs"
STATE_FILE="$ROOT_DIR/memory/watchdog-state.json"
mkdir -p "$(dirname "$CRON_FILE")" "$LOG_DIR"
cat >"$CRON_FILE" <<EOF
*/10 * * * * cd "$ROOT_DIR" && /usr/bin/env node "$RUNNER" --write-state --state "$STATE_FILE" --evidence-dir "$LOG_DIR" >> "$LOG_DIR/cron.log" 2>&1
EOF
printf 'Wrote cron snippet: %s\n' "$CRON_FILE"
printf 'To install for current user, run:\n'
printf ' (crontab -l 2>/dev/null; cat "%s") | crontab -\n' "$CRON_FILE"

263
scripts/long_task_watchdog.mjs Executable file
View File

@@ -0,0 +1,263 @@
#!/usr/bin/env node
import fs from 'node:fs';
import path from 'node:path';
import process from 'node:process';
const ROOT_DIR = path.resolve(import.meta.dirname, '..');
const DEFAULT_STATE_PATH = path.join(ROOT_DIR, 'memory', 'watchdog-state.json');
const DEFAULT_EVIDENCE_DIR = path.join(ROOT_DIR, 'state', 'long-task-watchdog');
function parseArgs(argv) {
const args = {
compact: false,
state: DEFAULT_STATE_PATH,
now: null,
evidenceDir: DEFAULT_EVIDENCE_DIR,
writeState: false,
help: false,
};
for (let i = 0; i < argv.length; i += 1) {
const token = argv[i];
if (token === '--compact') {
args.compact = true;
continue;
}
if (token === '--write-state') {
args.writeState = true;
continue;
}
if (token === '--help' || token === '-h') {
args.help = true;
continue;
}
if (token === '--state') {
args.state = argv[i + 1] ?? args.state;
i += 1;
continue;
}
if (token.startsWith('--state=')) {
args.state = token.slice('--state='.length) || args.state;
continue;
}
if (token === '--now') {
args.now = argv[i + 1] ?? null;
i += 1;
continue;
}
if (token.startsWith('--now=')) {
args.now = token.slice('--now='.length) || null;
continue;
}
if (token === '--evidence-dir') {
args.evidenceDir = argv[i + 1] ?? args.evidenceDir;
i += 1;
continue;
}
if (token.startsWith('--evidence-dir=')) {
args.evidenceDir = token.slice('--evidence-dir='.length) || args.evidenceDir;
continue;
}
}
return args;
}
function printHelp() {
process.stdout.write([
'Usage: node scripts/long_task_watchdog.mjs [--compact] [--write-state] [--state <path>] [--now <iso>] [--evidence-dir <path>]',
'',
'Minimal file-backed long-task watchdog runner.',
].join('\n') + '\n');
}
function parseJsonFile(filePath) {
const raw = fs.readFileSync(filePath, 'utf8');
return JSON.parse(raw);
}
function parseTime(value) {
if (typeof value !== 'string' || value.length === 0) return null;
const timestamp = Date.parse(value);
return Number.isNaN(timestamp) ? null : timestamp;
}
function toIso(value) {
return new Date(value).toISOString();
}
function toSafeName(value) {
return String(value || 'watchdog')
.replace(/[^a-zA-Z0-9._-]+/g, '-')
.replace(/^-+|-+$/g, '')
.slice(0, 80) || 'watchdog';
}
function evaluateWatchdog(watchdog, nowMs) {
const intervalMinutes = Number.isFinite(watchdog?.intervalMinutes)
? watchdog.intervalMinutes
: Number.parseInt(String(watchdog?.intervalMinutes ?? '0'), 10);
const intervalMs = intervalMinutes > 0 ? intervalMinutes * 60 * 1000 : 0;
const milestoneMs = parseTime(watchdog?.lastMilestoneAt);
const lastAlertMs = parseTime(watchdog?.lastAlertAt);
const active = watchdog?.status === 'active';
if (!active) {
return {
id: watchdog?.id ?? null,
active: false,
overdue: false,
action: 'skip_inactive',
reason: 'watchdog is not active',
};
}
if (!intervalMs || milestoneMs === null) {
return {
id: watchdog?.id ?? null,
active: true,
overdue: false,
action: 'invalid_contract',
reason: 'intervalMinutes or lastMilestoneAt is missing/invalid',
};
}
const dueAtMs = milestoneMs + intervalMs;
const overdue = nowMs >= dueAtMs;
if (!overdue) {
return {
id: watchdog?.id ?? null,
active: true,
overdue: false,
action: 'within_interval',
reason: 'last milestone is still within interval',
dueAt: toIso(dueAtMs),
minutesOverdue: 0,
};
}
const lastAlertStillFresh = lastAlertMs !== null && lastAlertMs >= dueAtMs;
if (lastAlertStillFresh) {
return {
id: watchdog?.id ?? null,
active: true,
overdue: true,
action: 'already_alerted_this_interval',
reason: 'lastAlertAt already covers current overdue interval',
dueAt: toIso(dueAtMs),
minutesOverdue: Math.floor((nowMs - dueAtMs) / 60000),
};
}
return {
id: watchdog?.id ?? null,
active: true,
overdue: true,
action: 'emit_external_evidence',
reason: 'active watchdog is overdue and has not been externally evidenced for this interval',
dueAt: toIso(dueAtMs),
minutesOverdue: Math.floor((nowMs - dueAtMs) / 60000),
};
}
function ensureDir(dirPath) {
fs.mkdirSync(dirPath, { recursive: true });
}
function writeEvidence(evidenceDir, watchdog, evaluation, nowIso) {
ensureDir(evidenceDir);
const fileName = `${nowIso.replace(/[:]/g, '').replace(/\.\d{3}Z$/, 'Z')}-${toSafeName(watchdog.id)}.json`;
const filePath = path.join(evidenceDir, fileName);
const payload = {
generatedAt: nowIso,
tool: 'long_task_watchdog',
watchdog: {
id: watchdog.id,
task: watchdog.task,
ownerSession: watchdog.ownerSession ?? null,
ownerSessionKey: watchdog.ownerSessionKey ?? null,
reportChannel: watchdog.reportChannel ?? watchdog.channel ?? null,
reportTarget: watchdog.reportTarget ?? watchdog.target ?? null,
intervalMinutes: watchdog.intervalMinutes,
lastMilestoneAt: watchdog.lastMilestoneAt ?? null,
lastAlertAt: watchdog.lastAlertAt ?? null,
},
evaluation,
nextExpectedExternalAction: [
'nudge owner session',
'report owner-visible checkpoint',
'or respawn / inspect locally if owner appears stalled',
],
};
fs.writeFileSync(filePath, `${JSON.stringify(payload, null, 2)}\n`, 'utf8');
return filePath;
}
function main() {
const args = parseArgs(process.argv.slice(2));
if (args.help) {
printHelp();
process.exit(0);
}
const nowMs = args.now ? parseTime(args.now) : Date.now();
if (nowMs === null) {
process.stderr.write('Invalid --now value\n');
process.exit(1);
}
const nowIso = toIso(nowMs);
const state = parseJsonFile(args.state);
const watchdogs = Array.isArray(state.watchdogs) ? state.watchdogs : [];
const evaluations = watchdogs.map((watchdog) => ({
watchdogId: watchdog?.id ?? null,
...evaluateWatchdog(watchdog, nowMs),
}));
const evidenceWrites = [];
const nextWatchdogs = watchdogs.map((watchdog, index) => {
const evaluation = evaluations[index];
if (evaluation.action !== 'emit_external_evidence') {
return watchdog;
}
const evidencePath = writeEvidence(args.evidenceDir, watchdog, evaluation, nowIso);
evidenceWrites.push({ watchdogId: watchdog.id, path: evidencePath });
return {
...watchdog,
lastAlertAt: nowIso,
lastObservedActivityAt: watchdog.lastObservedActivityAt ?? watchdog.lastMilestoneAt ?? null,
lastNudgeAt: watchdog.lastNudgeAt ?? null,
};
});
if (args.writeState) {
const nextState = {
...state,
watchdogs: nextWatchdogs,
};
fs.writeFileSync(args.state, `${JSON.stringify(nextState, null, 2)}\n`, 'utf8');
}
const response = {
ok: true,
tool: 'long_task_watchdog',
version: 'mvp-v1',
statePath: path.resolve(args.state),
evidenceDir: path.resolve(args.evidenceDir),
now: nowIso,
writeState: args.writeState,
result: {
activeCount: watchdogs.filter((item) => item?.status === 'active').length,
overdueCount: evaluations.filter((item) => item.overdue === true).length,
emittedCount: evidenceWrites.length,
evaluations,
evidenceWrites,
},
};
process.stdout.write(`${JSON.stringify(response, null, args.compact ? 0 : 2)}\n`);
}
main();

View File

@@ -0,0 +1,159 @@
#!/usr/bin/env node
import assert from 'node:assert/strict';
import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync, readdirSync } from 'node:fs';
import { tmpdir } from 'node:os';
import path from 'node:path';
import process from 'node:process';
import { spawnSync } from 'node:child_process';
const ROOT_DIR = path.resolve(import.meta.dirname, '..');
const WATCHDOG_SCRIPT = path.join(ROOT_DIR, 'scripts', 'long_task_watchdog.mjs');
function createFixtureRunner() {
const fixtureRoot = mkdtempSync(path.join(tmpdir(), 'long-task-watchdog-test-'));
const statePath = path.join(fixtureRoot, 'watchdog-state.json');
const evidenceDir = path.join(fixtureRoot, 'evidence');
mkdirSync(evidenceDir, { recursive: true });
function writeState(content) {
const body = typeof content === 'string' ? content : JSON.stringify(content, null, 2);
writeFileSync(statePath, body);
return statePath;
}
function run(args = []) {
const result = spawnSync(process.execPath, [WATCHDOG_SCRIPT, '--state', statePath, '--evidence-dir', evidenceDir, ...args], {
cwd: ROOT_DIR,
encoding: 'utf8',
});
return {
status: result.status,
stdout: result.stdout ?? '',
stderr: result.stderr ?? '',
};
}
function readState() {
return JSON.parse(readFileSync(statePath, 'utf8'));
}
function listEvidence() {
return readdirSync(evidenceDir).sort();
}
function cleanup() {
rmSync(fixtureRoot, { recursive: true, force: true });
}
return { statePath, evidenceDir, writeState, run, readState, listEvidence, cleanup };
}
const tests = [];
function test(name, fn) { tests.push({ name, fn }); }
function printResult(prefix, name, detail = '') {
process.stdout.write(`${prefix} ${name}${detail ? ` ${detail}` : ''}\n`);
}
test('inactive watchdogs do not emit evidence', () => {
const runner = createFixtureRunner();
try {
runner.writeState({
version: 1,
watchdogs: [
{
id: 'paused-watchdog',
task: 'paused task',
status: 'paused',
intervalMinutes: 10,
lastMilestoneAt: '2026-05-07T08:00:00.000Z',
},
],
});
const result = runner.run(['--compact', '--now', '2026-05-07T08:20:00.000Z']);
assert.equal(result.status, 0, result.stderr);
const payload = JSON.parse(result.stdout);
assert.equal(payload.result.emittedCount, 0);
assert.deepEqual(runner.listEvidence(), []);
} finally {
runner.cleanup();
}
});
test('overdue active watchdog emits external evidence and updates lastAlertAt when write-state is enabled', () => {
const runner = createFixtureRunner();
try {
runner.writeState({
version: 1,
watchdogs: [
{
id: 'reporting-governance-plugin-watchdog',
task: 'reporting-governance plugin spec development',
status: 'active',
ownerSessionKey: 'agent:coder:main',
reportChannel: 'telegram',
reportTarget: '864811879',
intervalMinutes: 10,
lastMilestoneAt: '2026-05-07T08:00:00.000Z',
lastAlertAt: null,
},
],
});
const result = runner.run(['--compact', '--write-state', '--now', '2026-05-07T08:20:00.000Z']);
assert.equal(result.status, 0, result.stderr);
const payload = JSON.parse(result.stdout);
assert.equal(payload.result.emittedCount, 1);
const evidenceFiles = runner.listEvidence();
assert.equal(evidenceFiles.length, 1);
const nextState = runner.readState();
assert.equal(nextState.watchdogs[0].lastAlertAt, '2026-05-07T08:20:00.000Z');
} finally {
runner.cleanup();
}
});
test('same interval is not alerted twice once lastAlertAt covers the overdue window', () => {
const runner = createFixtureRunner();
try {
runner.writeState({
version: 1,
watchdogs: [
{
id: 'reporting-governance-plugin-watchdog',
task: 'reporting-governance plugin spec development',
status: 'active',
intervalMinutes: 10,
lastMilestoneAt: '2026-05-07T08:00:00.000Z',
lastAlertAt: '2026-05-07T08:12:00.000Z',
},
],
});
const result = runner.run(['--compact', '--write-state', '--now', '2026-05-07T08:15:00.000Z']);
assert.equal(result.status, 0, result.stderr);
const payload = JSON.parse(result.stdout);
assert.equal(payload.result.emittedCount, 0);
assert.deepEqual(runner.listEvidence(), []);
} finally {
runner.cleanup();
}
});
let failures = 0;
for (const { name, fn } of tests) {
try {
fn();
printResult('ok', name);
} catch (error) {
failures += 1;
printResult('not ok', name, `- ${error instanceof Error ? error.message : String(error)}`);
}
}
if (failures > 0) {
process.exit(1);
}

View File

@@ -0,0 +1 @@
*/10 * * * * cd "/home/alice/.openclaw/workspace/.worktrees/reporting-governance-plugin" && /usr/bin/env node "/home/alice/.openclaw/workspace/.worktrees/reporting-governance-plugin/scripts/long_task_watchdog.mjs" --write-state --state "/home/alice/.openclaw/workspace/.worktrees/reporting-governance-plugin/memory/watchdog-state.json" --evidence-dir "/home/alice/.openclaw/workspace/.worktrees/reporting-governance-plugin/state/long-task-watchdog" >> "/home/alice/.openclaw/workspace/.worktrees/reporting-governance-plugin/state/long-task-watchdog/cron.log" 2>&1

View File

@@ -0,0 +1,30 @@
{
"generatedAt": "2026-05-07T09:00:00.000Z",
"tool": "long_task_watchdog",
"watchdog": {
"id": "reporting-governance-plugin-watchdog",
"task": "reporting-governance plugin spec development",
"ownerSession": "main-telegram-eric",
"ownerSessionKey": "agent:coder:main",
"reportChannel": "telegram",
"reportTarget": "864811879",
"intervalMinutes": 10,
"lastMilestoneAt": "2026-05-07T16:46:00+08:00",
"lastAlertAt": null
},
"evaluation": {
"watchdogId": "reporting-governance-plugin-watchdog",
"id": "reporting-governance-plugin-watchdog",
"active": true,
"overdue": true,
"action": "emit_external_evidence",
"reason": "active watchdog is overdue and has not been externally evidenced for this interval",
"dueAt": "2026-05-07T08:56:00.000Z",
"minutesOverdue": 4
},
"nextExpectedExternalAction": [
"nudge owner session",
"report owner-visible checkpoint",
"or respawn / inspect locally if owner appears stalled"
]
}

View File

@@ -0,0 +1,19 @@
# Long-task watchdog evidence
This directory stores file-backed external evidence produced by `scripts/long_task_watchdog.mjs`.
## Purpose
The original watchdog chain had state registration but no active external executor.
This directory is the minimal proof surface for the rebuilt execution chain:
- each overdue run writes a timestamped evidence artifact
- cron can append to `cron.log`
- `memory/watchdog-state.json` records `lastAlertAt`
## Expected files
- `cron.log` — append-only stdout/stderr from cron-triggered runs
- `<timestamp>-<watchdog-id>.json` — overdue evidence artifact per emitted interval
These artifacts are meant to be machine-verifiable and safe to inspect from the repo worktree.