fix: restore minimal long-task watchdog execution chain
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"version": 7,
|
||||
"version": 8,
|
||||
"watchdogs": [
|
||||
{
|
||||
"id": "paperclip-bootstrap-watchdog",
|
||||
@@ -33,6 +33,26 @@
|
||||
"lastNudgeAt": "2026-04-21T18:33:00+08:00",
|
||||
"escalationPolicy": "nudge-owner-then-report",
|
||||
"notes": "已依 Eric 總管指示關閉 long-task watchdog:recurring cron 已停用,這筆 watchdog 改為 paused,不再自動催辦或回報。"
|
||||
},
|
||||
{
|
||||
"id": "reporting-governance-plugin-watchdog",
|
||||
"task": "reporting-governance plugin spec development",
|
||||
"status": "active",
|
||||
"ownerSession": "main-telegram-eric",
|
||||
"ownerSessionKey": "agent:coder:main",
|
||||
"ownerAgentId": "coder",
|
||||
"channel": "telegram",
|
||||
"target": "864811879",
|
||||
"reportChannel": "telegram",
|
||||
"reportTarget": "864811879",
|
||||
"intervalMinutes": 10,
|
||||
"startedAt": "2026-05-07T16:46:00+08:00",
|
||||
"lastMilestoneAt": "2026-05-07T16:46:00+08:00",
|
||||
"lastAlertAt": "2026-05-07T09:00:00.000Z",
|
||||
"lastObservedActivityAt": "2026-05-07T16:46:00+08:00",
|
||||
"lastNudgeAt": null,
|
||||
"escalationPolicy": "nudge-owner-then-report",
|
||||
"notes": "恢復最小外部巡查鏈:由 long_task_watchdog runner + 每 10 分鐘 cron snippet 產生外部 evidence;若要接回真正 sessions/message 巡查,再由上層執行器接手。"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
18
scripts/install_long_task_watchdog_cron.sh
Executable file
18
scripts/install_long_task_watchdog_cron.sh
Executable file
@@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ROOT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
CRON_FILE="$ROOT_DIR/state/cron/long-task-watchdog.cron"
|
||||
LOG_DIR="$ROOT_DIR/state/long-task-watchdog"
|
||||
RUNNER="$ROOT_DIR/scripts/long_task_watchdog.mjs"
|
||||
STATE_FILE="$ROOT_DIR/memory/watchdog-state.json"
|
||||
|
||||
mkdir -p "$(dirname "$CRON_FILE")" "$LOG_DIR"
|
||||
|
||||
cat >"$CRON_FILE" <<EOF
|
||||
*/10 * * * * cd "$ROOT_DIR" && /usr/bin/env node "$RUNNER" --write-state --state "$STATE_FILE" --evidence-dir "$LOG_DIR" >> "$LOG_DIR/cron.log" 2>&1
|
||||
EOF
|
||||
|
||||
printf 'Wrote cron snippet: %s\n' "$CRON_FILE"
|
||||
printf 'To install for current user, run:\n'
|
||||
printf ' (crontab -l 2>/dev/null; cat "%s") | crontab -\n' "$CRON_FILE"
|
||||
263
scripts/long_task_watchdog.mjs
Executable file
263
scripts/long_task_watchdog.mjs
Executable file
@@ -0,0 +1,263 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import process from 'node:process';
|
||||
|
||||
const ROOT_DIR = path.resolve(import.meta.dirname, '..');
|
||||
const DEFAULT_STATE_PATH = path.join(ROOT_DIR, 'memory', 'watchdog-state.json');
|
||||
const DEFAULT_EVIDENCE_DIR = path.join(ROOT_DIR, 'state', 'long-task-watchdog');
|
||||
|
||||
function parseArgs(argv) {
|
||||
const args = {
|
||||
compact: false,
|
||||
state: DEFAULT_STATE_PATH,
|
||||
now: null,
|
||||
evidenceDir: DEFAULT_EVIDENCE_DIR,
|
||||
writeState: false,
|
||||
help: false,
|
||||
};
|
||||
|
||||
for (let i = 0; i < argv.length; i += 1) {
|
||||
const token = argv[i];
|
||||
if (token === '--compact') {
|
||||
args.compact = true;
|
||||
continue;
|
||||
}
|
||||
if (token === '--write-state') {
|
||||
args.writeState = true;
|
||||
continue;
|
||||
}
|
||||
if (token === '--help' || token === '-h') {
|
||||
args.help = true;
|
||||
continue;
|
||||
}
|
||||
if (token === '--state') {
|
||||
args.state = argv[i + 1] ?? args.state;
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
if (token.startsWith('--state=')) {
|
||||
args.state = token.slice('--state='.length) || args.state;
|
||||
continue;
|
||||
}
|
||||
if (token === '--now') {
|
||||
args.now = argv[i + 1] ?? null;
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
if (token.startsWith('--now=')) {
|
||||
args.now = token.slice('--now='.length) || null;
|
||||
continue;
|
||||
}
|
||||
if (token === '--evidence-dir') {
|
||||
args.evidenceDir = argv[i + 1] ?? args.evidenceDir;
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
if (token.startsWith('--evidence-dir=')) {
|
||||
args.evidenceDir = token.slice('--evidence-dir='.length) || args.evidenceDir;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
function printHelp() {
|
||||
process.stdout.write([
|
||||
'Usage: node scripts/long_task_watchdog.mjs [--compact] [--write-state] [--state <path>] [--now <iso>] [--evidence-dir <path>]',
|
||||
'',
|
||||
'Minimal file-backed long-task watchdog runner.',
|
||||
].join('\n') + '\n');
|
||||
}
|
||||
|
||||
function parseJsonFile(filePath) {
|
||||
const raw = fs.readFileSync(filePath, 'utf8');
|
||||
return JSON.parse(raw);
|
||||
}
|
||||
|
||||
function parseTime(value) {
|
||||
if (typeof value !== 'string' || value.length === 0) return null;
|
||||
const timestamp = Date.parse(value);
|
||||
return Number.isNaN(timestamp) ? null : timestamp;
|
||||
}
|
||||
|
||||
function toIso(value) {
|
||||
return new Date(value).toISOString();
|
||||
}
|
||||
|
||||
function toSafeName(value) {
|
||||
return String(value || 'watchdog')
|
||||
.replace(/[^a-zA-Z0-9._-]+/g, '-')
|
||||
.replace(/^-+|-+$/g, '')
|
||||
.slice(0, 80) || 'watchdog';
|
||||
}
|
||||
|
||||
function evaluateWatchdog(watchdog, nowMs) {
|
||||
const intervalMinutes = Number.isFinite(watchdog?.intervalMinutes)
|
||||
? watchdog.intervalMinutes
|
||||
: Number.parseInt(String(watchdog?.intervalMinutes ?? '0'), 10);
|
||||
const intervalMs = intervalMinutes > 0 ? intervalMinutes * 60 * 1000 : 0;
|
||||
const milestoneMs = parseTime(watchdog?.lastMilestoneAt);
|
||||
const lastAlertMs = parseTime(watchdog?.lastAlertAt);
|
||||
const active = watchdog?.status === 'active';
|
||||
|
||||
if (!active) {
|
||||
return {
|
||||
id: watchdog?.id ?? null,
|
||||
active: false,
|
||||
overdue: false,
|
||||
action: 'skip_inactive',
|
||||
reason: 'watchdog is not active',
|
||||
};
|
||||
}
|
||||
|
||||
if (!intervalMs || milestoneMs === null) {
|
||||
return {
|
||||
id: watchdog?.id ?? null,
|
||||
active: true,
|
||||
overdue: false,
|
||||
action: 'invalid_contract',
|
||||
reason: 'intervalMinutes or lastMilestoneAt is missing/invalid',
|
||||
};
|
||||
}
|
||||
|
||||
const dueAtMs = milestoneMs + intervalMs;
|
||||
const overdue = nowMs >= dueAtMs;
|
||||
|
||||
if (!overdue) {
|
||||
return {
|
||||
id: watchdog?.id ?? null,
|
||||
active: true,
|
||||
overdue: false,
|
||||
action: 'within_interval',
|
||||
reason: 'last milestone is still within interval',
|
||||
dueAt: toIso(dueAtMs),
|
||||
minutesOverdue: 0,
|
||||
};
|
||||
}
|
||||
|
||||
const lastAlertStillFresh = lastAlertMs !== null && lastAlertMs >= dueAtMs;
|
||||
if (lastAlertStillFresh) {
|
||||
return {
|
||||
id: watchdog?.id ?? null,
|
||||
active: true,
|
||||
overdue: true,
|
||||
action: 'already_alerted_this_interval',
|
||||
reason: 'lastAlertAt already covers current overdue interval',
|
||||
dueAt: toIso(dueAtMs),
|
||||
minutesOverdue: Math.floor((nowMs - dueAtMs) / 60000),
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
id: watchdog?.id ?? null,
|
||||
active: true,
|
||||
overdue: true,
|
||||
action: 'emit_external_evidence',
|
||||
reason: 'active watchdog is overdue and has not been externally evidenced for this interval',
|
||||
dueAt: toIso(dueAtMs),
|
||||
minutesOverdue: Math.floor((nowMs - dueAtMs) / 60000),
|
||||
};
|
||||
}
|
||||
|
||||
function ensureDir(dirPath) {
|
||||
fs.mkdirSync(dirPath, { recursive: true });
|
||||
}
|
||||
|
||||
function writeEvidence(evidenceDir, watchdog, evaluation, nowIso) {
|
||||
ensureDir(evidenceDir);
|
||||
const fileName = `${nowIso.replace(/[:]/g, '').replace(/\.\d{3}Z$/, 'Z')}-${toSafeName(watchdog.id)}.json`;
|
||||
const filePath = path.join(evidenceDir, fileName);
|
||||
const payload = {
|
||||
generatedAt: nowIso,
|
||||
tool: 'long_task_watchdog',
|
||||
watchdog: {
|
||||
id: watchdog.id,
|
||||
task: watchdog.task,
|
||||
ownerSession: watchdog.ownerSession ?? null,
|
||||
ownerSessionKey: watchdog.ownerSessionKey ?? null,
|
||||
reportChannel: watchdog.reportChannel ?? watchdog.channel ?? null,
|
||||
reportTarget: watchdog.reportTarget ?? watchdog.target ?? null,
|
||||
intervalMinutes: watchdog.intervalMinutes,
|
||||
lastMilestoneAt: watchdog.lastMilestoneAt ?? null,
|
||||
lastAlertAt: watchdog.lastAlertAt ?? null,
|
||||
},
|
||||
evaluation,
|
||||
nextExpectedExternalAction: [
|
||||
'nudge owner session',
|
||||
'report owner-visible checkpoint',
|
||||
'or respawn / inspect locally if owner appears stalled',
|
||||
],
|
||||
};
|
||||
fs.writeFileSync(filePath, `${JSON.stringify(payload, null, 2)}\n`, 'utf8');
|
||||
return filePath;
|
||||
}
|
||||
|
||||
function main() {
|
||||
const args = parseArgs(process.argv.slice(2));
|
||||
if (args.help) {
|
||||
printHelp();
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const nowMs = args.now ? parseTime(args.now) : Date.now();
|
||||
if (nowMs === null) {
|
||||
process.stderr.write('Invalid --now value\n');
|
||||
process.exit(1);
|
||||
}
|
||||
const nowIso = toIso(nowMs);
|
||||
|
||||
const state = parseJsonFile(args.state);
|
||||
const watchdogs = Array.isArray(state.watchdogs) ? state.watchdogs : [];
|
||||
const evaluations = watchdogs.map((watchdog) => ({
|
||||
watchdogId: watchdog?.id ?? null,
|
||||
...evaluateWatchdog(watchdog, nowMs),
|
||||
}));
|
||||
|
||||
const evidenceWrites = [];
|
||||
const nextWatchdogs = watchdogs.map((watchdog, index) => {
|
||||
const evaluation = evaluations[index];
|
||||
if (evaluation.action !== 'emit_external_evidence') {
|
||||
return watchdog;
|
||||
}
|
||||
const evidencePath = writeEvidence(args.evidenceDir, watchdog, evaluation, nowIso);
|
||||
evidenceWrites.push({ watchdogId: watchdog.id, path: evidencePath });
|
||||
return {
|
||||
...watchdog,
|
||||
lastAlertAt: nowIso,
|
||||
lastObservedActivityAt: watchdog.lastObservedActivityAt ?? watchdog.lastMilestoneAt ?? null,
|
||||
lastNudgeAt: watchdog.lastNudgeAt ?? null,
|
||||
};
|
||||
});
|
||||
|
||||
if (args.writeState) {
|
||||
const nextState = {
|
||||
...state,
|
||||
watchdogs: nextWatchdogs,
|
||||
};
|
||||
fs.writeFileSync(args.state, `${JSON.stringify(nextState, null, 2)}\n`, 'utf8');
|
||||
}
|
||||
|
||||
const response = {
|
||||
ok: true,
|
||||
tool: 'long_task_watchdog',
|
||||
version: 'mvp-v1',
|
||||
statePath: path.resolve(args.state),
|
||||
evidenceDir: path.resolve(args.evidenceDir),
|
||||
now: nowIso,
|
||||
writeState: args.writeState,
|
||||
result: {
|
||||
activeCount: watchdogs.filter((item) => item?.status === 'active').length,
|
||||
overdueCount: evaluations.filter((item) => item.overdue === true).length,
|
||||
emittedCount: evidenceWrites.length,
|
||||
evaluations,
|
||||
evidenceWrites,
|
||||
},
|
||||
};
|
||||
|
||||
process.stdout.write(`${JSON.stringify(response, null, args.compact ? 0 : 2)}\n`);
|
||||
}
|
||||
|
||||
main();
|
||||
159
scripts/test_long_task_watchdog.mjs
Executable file
159
scripts/test_long_task_watchdog.mjs
Executable file
@@ -0,0 +1,159 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import assert from 'node:assert/strict';
|
||||
import { mkdtempSync, mkdirSync, readFileSync, rmSync, writeFileSync, readdirSync } from 'node:fs';
|
||||
import { tmpdir } from 'node:os';
|
||||
import path from 'node:path';
|
||||
import process from 'node:process';
|
||||
import { spawnSync } from 'node:child_process';
|
||||
|
||||
const ROOT_DIR = path.resolve(import.meta.dirname, '..');
|
||||
const WATCHDOG_SCRIPT = path.join(ROOT_DIR, 'scripts', 'long_task_watchdog.mjs');
|
||||
|
||||
function createFixtureRunner() {
|
||||
const fixtureRoot = mkdtempSync(path.join(tmpdir(), 'long-task-watchdog-test-'));
|
||||
const statePath = path.join(fixtureRoot, 'watchdog-state.json');
|
||||
const evidenceDir = path.join(fixtureRoot, 'evidence');
|
||||
mkdirSync(evidenceDir, { recursive: true });
|
||||
|
||||
function writeState(content) {
|
||||
const body = typeof content === 'string' ? content : JSON.stringify(content, null, 2);
|
||||
writeFileSync(statePath, body);
|
||||
return statePath;
|
||||
}
|
||||
|
||||
function run(args = []) {
|
||||
const result = spawnSync(process.execPath, [WATCHDOG_SCRIPT, '--state', statePath, '--evidence-dir', evidenceDir, ...args], {
|
||||
cwd: ROOT_DIR,
|
||||
encoding: 'utf8',
|
||||
});
|
||||
return {
|
||||
status: result.status,
|
||||
stdout: result.stdout ?? '',
|
||||
stderr: result.stderr ?? '',
|
||||
};
|
||||
}
|
||||
|
||||
function readState() {
|
||||
return JSON.parse(readFileSync(statePath, 'utf8'));
|
||||
}
|
||||
|
||||
function listEvidence() {
|
||||
return readdirSync(evidenceDir).sort();
|
||||
}
|
||||
|
||||
function cleanup() {
|
||||
rmSync(fixtureRoot, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
return { statePath, evidenceDir, writeState, run, readState, listEvidence, cleanup };
|
||||
}
|
||||
|
||||
const tests = [];
|
||||
function test(name, fn) { tests.push({ name, fn }); }
|
||||
|
||||
function printResult(prefix, name, detail = '') {
|
||||
process.stdout.write(`${prefix} ${name}${detail ? ` ${detail}` : ''}\n`);
|
||||
}
|
||||
|
||||
test('inactive watchdogs do not emit evidence', () => {
|
||||
const runner = createFixtureRunner();
|
||||
try {
|
||||
runner.writeState({
|
||||
version: 1,
|
||||
watchdogs: [
|
||||
{
|
||||
id: 'paused-watchdog',
|
||||
task: 'paused task',
|
||||
status: 'paused',
|
||||
intervalMinutes: 10,
|
||||
lastMilestoneAt: '2026-05-07T08:00:00.000Z',
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const result = runner.run(['--compact', '--now', '2026-05-07T08:20:00.000Z']);
|
||||
assert.equal(result.status, 0, result.stderr);
|
||||
const payload = JSON.parse(result.stdout);
|
||||
assert.equal(payload.result.emittedCount, 0);
|
||||
assert.deepEqual(runner.listEvidence(), []);
|
||||
} finally {
|
||||
runner.cleanup();
|
||||
}
|
||||
});
|
||||
|
||||
test('overdue active watchdog emits external evidence and updates lastAlertAt when write-state is enabled', () => {
|
||||
const runner = createFixtureRunner();
|
||||
try {
|
||||
runner.writeState({
|
||||
version: 1,
|
||||
watchdogs: [
|
||||
{
|
||||
id: 'reporting-governance-plugin-watchdog',
|
||||
task: 'reporting-governance plugin spec development',
|
||||
status: 'active',
|
||||
ownerSessionKey: 'agent:coder:main',
|
||||
reportChannel: 'telegram',
|
||||
reportTarget: '864811879',
|
||||
intervalMinutes: 10,
|
||||
lastMilestoneAt: '2026-05-07T08:00:00.000Z',
|
||||
lastAlertAt: null,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const result = runner.run(['--compact', '--write-state', '--now', '2026-05-07T08:20:00.000Z']);
|
||||
assert.equal(result.status, 0, result.stderr);
|
||||
const payload = JSON.parse(result.stdout);
|
||||
assert.equal(payload.result.emittedCount, 1);
|
||||
const evidenceFiles = runner.listEvidence();
|
||||
assert.equal(evidenceFiles.length, 1);
|
||||
|
||||
const nextState = runner.readState();
|
||||
assert.equal(nextState.watchdogs[0].lastAlertAt, '2026-05-07T08:20:00.000Z');
|
||||
} finally {
|
||||
runner.cleanup();
|
||||
}
|
||||
});
|
||||
|
||||
test('same interval is not alerted twice once lastAlertAt covers the overdue window', () => {
|
||||
const runner = createFixtureRunner();
|
||||
try {
|
||||
runner.writeState({
|
||||
version: 1,
|
||||
watchdogs: [
|
||||
{
|
||||
id: 'reporting-governance-plugin-watchdog',
|
||||
task: 'reporting-governance plugin spec development',
|
||||
status: 'active',
|
||||
intervalMinutes: 10,
|
||||
lastMilestoneAt: '2026-05-07T08:00:00.000Z',
|
||||
lastAlertAt: '2026-05-07T08:12:00.000Z',
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const result = runner.run(['--compact', '--write-state', '--now', '2026-05-07T08:15:00.000Z']);
|
||||
assert.equal(result.status, 0, result.stderr);
|
||||
const payload = JSON.parse(result.stdout);
|
||||
assert.equal(payload.result.emittedCount, 0);
|
||||
assert.deepEqual(runner.listEvidence(), []);
|
||||
} finally {
|
||||
runner.cleanup();
|
||||
}
|
||||
});
|
||||
|
||||
let failures = 0;
|
||||
for (const { name, fn } of tests) {
|
||||
try {
|
||||
fn();
|
||||
printResult('ok', name);
|
||||
} catch (error) {
|
||||
failures += 1;
|
||||
printResult('not ok', name, `- ${error instanceof Error ? error.message : String(error)}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (failures > 0) {
|
||||
process.exit(1);
|
||||
}
|
||||
1
state/cron/long-task-watchdog.cron
Normal file
1
state/cron/long-task-watchdog.cron
Normal file
@@ -0,0 +1 @@
|
||||
*/10 * * * * cd "/home/alice/.openclaw/workspace/.worktrees/reporting-governance-plugin" && /usr/bin/env node "/home/alice/.openclaw/workspace/.worktrees/reporting-governance-plugin/scripts/long_task_watchdog.mjs" --write-state --state "/home/alice/.openclaw/workspace/.worktrees/reporting-governance-plugin/memory/watchdog-state.json" --evidence-dir "/home/alice/.openclaw/workspace/.worktrees/reporting-governance-plugin/state/long-task-watchdog" >> "/home/alice/.openclaw/workspace/.worktrees/reporting-governance-plugin/state/long-task-watchdog/cron.log" 2>&1
|
||||
@@ -0,0 +1,30 @@
|
||||
{
|
||||
"generatedAt": "2026-05-07T09:00:00.000Z",
|
||||
"tool": "long_task_watchdog",
|
||||
"watchdog": {
|
||||
"id": "reporting-governance-plugin-watchdog",
|
||||
"task": "reporting-governance plugin spec development",
|
||||
"ownerSession": "main-telegram-eric",
|
||||
"ownerSessionKey": "agent:coder:main",
|
||||
"reportChannel": "telegram",
|
||||
"reportTarget": "864811879",
|
||||
"intervalMinutes": 10,
|
||||
"lastMilestoneAt": "2026-05-07T16:46:00+08:00",
|
||||
"lastAlertAt": null
|
||||
},
|
||||
"evaluation": {
|
||||
"watchdogId": "reporting-governance-plugin-watchdog",
|
||||
"id": "reporting-governance-plugin-watchdog",
|
||||
"active": true,
|
||||
"overdue": true,
|
||||
"action": "emit_external_evidence",
|
||||
"reason": "active watchdog is overdue and has not been externally evidenced for this interval",
|
||||
"dueAt": "2026-05-07T08:56:00.000Z",
|
||||
"minutesOverdue": 4
|
||||
},
|
||||
"nextExpectedExternalAction": [
|
||||
"nudge owner session",
|
||||
"report owner-visible checkpoint",
|
||||
"or respawn / inspect locally if owner appears stalled"
|
||||
]
|
||||
}
|
||||
19
state/long-task-watchdog/README.md
Normal file
19
state/long-task-watchdog/README.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# Long-task watchdog evidence
|
||||
|
||||
This directory stores file-backed external evidence produced by `scripts/long_task_watchdog.mjs`.
|
||||
|
||||
## Purpose
|
||||
|
||||
The original watchdog chain had state registration but no active external executor.
|
||||
This directory is the minimal proof surface for the rebuilt execution chain:
|
||||
|
||||
- each overdue run writes a timestamped evidence artifact
|
||||
- cron can append to `cron.log`
|
||||
- `memory/watchdog-state.json` records `lastAlertAt`
|
||||
|
||||
## Expected files
|
||||
|
||||
- `cron.log` — append-only stdout/stderr from cron-triggered runs
|
||||
- `<timestamp>-<watchdog-id>.json` — overdue evidence artifact per emitted interval
|
||||
|
||||
These artifacts are meant to be machine-verifiable and safe to inspect from the repo worktree.
|
||||
Reference in New Issue
Block a user