Initial import of watchdog-discord-route skill

This commit is contained in:
Alice
2026-04-22 08:33:51 +08:00
commit 8138fb011d
22 changed files with 2447 additions and 0 deletions

Binary file not shown.

View File

@@ -0,0 +1,174 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
SKILL_DIR="$(cd -- "$SCRIPT_DIR/.." && pwd)"
HOME_DIR="${HOME:?HOME is required}"
WORKSPACE_DEFAULT="$HOME_DIR/.openclaw/workspace"
WORKSPACE="${WATCHDOG_B_WORKSPACE:-$WORKSPACE_DEFAULT}"
LIVE_SCRIPT_DIR="${WATCHDOG_B_LIVE_SCRIPT_DIR:-$WORKSPACE/scripts/watchdog-b}"
SYSTEMD_USER_DIR="${WATCHDOG_B_SYSTEMD_USER_DIR:-$HOME_DIR/.config/systemd/user}"
CONFIG_DIR="${WATCHDOG_B_CONFIG_DIR:-$HOME_DIR/.config/openclaw}"
CONFIG_FILE="${WATCHDOG_B_CONFIG_FILE:-$CONFIG_DIR/watchdog-b.env}"
PROBE_SCRIPT="${WATCHDOG_B_RUNTIME_PROBE:-$SCRIPT_DIR/openclaw_runtime_probe.py}"
NODE_BIN_RAW="${WATCHDOG_B_NODE_BIN:-}"
OPENCLAW_MJS="${WATCHDOG_B_OPENCLAW_MJS:-}"
OPENCLAW_ENTRY="${WATCHDOG_B_OPENCLAW_ENTRY:-}"
OWNER_REPORT_PRODUCER="${WATCHDOG_B_OWNER_PRODUCER:-$LIVE_SCRIPT_DIR/owner_report_producer.py}"
OWNER_REPORT_DRIVER="${WATCHDOG_B_OWNER_DRIVER:-$LIVE_SCRIPT_DIR/owner_report_driver.py}"
OWNER_REPORT_CONSUMER_DEFAULT="$LIVE_SCRIPT_DIR/owner_report_consumer.py"
OWNER_REPORT_CONSUMER="${WATCHDOG_B_OWNER_REPORT_CONSUMER:-$OWNER_REPORT_CONSUMER_DEFAULT}"
FAILURES=0
pass() { echo "[PASS] $*"; }
warn() { echo "[WARN] $*"; }
fail() { echo "[FAIL] $*"; FAILURES=$((FAILURES+1)); }
check_exists() {
local path="$1" label="$2"
if [[ -e "$path" ]]; then
pass "$label: $path"
else
fail "$label missing: $path"
fi
}
check_exec_path() {
local raw="$1" label="$2"
local resolved=""
if [[ "$raw" == */* ]]; then
resolved="$raw"
if [[ -x "$resolved" ]]; then
pass "$label executable: $resolved"
else
fail "$label not executable: $resolved"
fi
return
fi
if resolved="$(command -v "$raw" 2>/dev/null)"; then
pass "$label on PATH: $resolved"
else
fail "$label not found on PATH: $raw"
fi
}
check_systemd_user() {
if ! command -v systemctl >/dev/null 2>&1; then
fail "systemctl not found"
return
fi
if systemctl --user --version >/dev/null 2>&1; then
pass "systemd --user command available"
else
fail "systemd --user unavailable"
fi
if systemctl --user show-environment >/dev/null 2>&1; then
pass "systemd --user bus reachable"
else
warn "systemd --user bus not reachable in current session"
fi
}
check_env_target() {
if [[ ! -f "$CONFIG_FILE" ]]; then
warn "config file not present yet: $CONFIG_FILE"
return
fi
local target=""
target="$(awk -F= '/^WATCHDOG_B_OWNER_REPORT_TARGET=/{print $2}' "$CONFIG_FILE" | tail -n 1 | tr -d '[:space:]' || true)"
if [[ -z "$target" ]]; then
fail "WATCHDOG_B_OWNER_REPORT_TARGET missing in $CONFIG_FILE"
elif [[ "$target" == "channel:REPLACE_ME" ]]; then
fail "WATCHDOG_B_OWNER_REPORT_TARGET still placeholder in $CONFIG_FILE"
elif [[ "$target" == channel:* || "$target" == user:* ]]; then
pass "WATCHDOG_B_OWNER_REPORT_TARGET looks configured: $target"
else
warn "WATCHDOG_B_OWNER_REPORT_TARGET present but format is unusual: $target"
fi
}
probe_runtime() {
if [[ ! -f "$PROBE_SCRIPT" ]]; then
fail "runtime probe missing: $PROBE_SCRIPT"
return
fi
local probe_output=""
if ! probe_output="$(python3 "$PROBE_SCRIPT" --shell 2>/dev/null)"; then
fail "runtime probe failed; set WATCHDOG_B_NODE_BIN / WATCHDOG_B_OPENCLAW_MJS / WATCHDOG_B_OPENCLAW_ENTRY explicitly"
return
fi
while IFS='=' read -r key value; do
case "$key" in
WATCHDOG_B_NODE_BIN) NODE_BIN_RAW="$value" ;;
WATCHDOG_B_OPENCLAW_MJS) OPENCLAW_MJS="$value" ;;
WATCHDOG_B_OPENCLAW_ENTRY) OPENCLAW_ENTRY="$value" ;;
esac
done <<< "$probe_output"
pass "runtime probe resolved node/openclaw paths"
}
check_message_cli() {
probe_runtime
if [[ -n "$OPENCLAW_ENTRY" && -f "$OPENCLAW_ENTRY" ]]; then
pass "openclaw entry present: $OPENCLAW_ENTRY"
else
fail "openclaw entry missing: ${OPENCLAW_ENTRY:-<unset>}"
fi
if [[ -n "$OPENCLAW_MJS" && -f "$OPENCLAW_MJS" ]]; then
pass "openclaw mjs present: $OPENCLAW_MJS"
else
fail "openclaw mjs missing: ${OPENCLAW_MJS:-<unset>}"
fi
}
echo "watchdog-discord-route bootstrap"
echo "- skill_dir: $SKILL_DIR"
echo "- workspace: $WORKSPACE"
echo "- live_script_dir: $LIVE_SCRIPT_DIR"
echo "- systemd_user_dir: $SYSTEMD_USER_DIR"
echo "- config_file: $CONFIG_FILE"
echo
echo "[bundle]"
check_exists "$SCRIPT_DIR/check_openclaw_state.sh" "bundled checker"
check_exists "$SCRIPT_DIR/run_watchdog_b.sh" "bundled runner"
check_exists "$SCRIPT_DIR/notify_watchdog_b.py" "bundled notifier"
check_exists "$SCRIPT_DIR/openclaw_runtime_probe.py" "bundled runtime probe"
check_exists "$SCRIPT_DIR/openclaw-watchdog-b.service" "bundled service"
check_exists "$SCRIPT_DIR/openclaw-watchdog-b.timer" "bundled timer"
check_exists "$SCRIPT_DIR/watchdog-b.env.example" "bundled env example"
echo
echo "[workspace/live paths]"
check_exists "$WORKSPACE" "workspace"
check_exists "$LIVE_SCRIPT_DIR" "live script dir"
check_exists "$OWNER_REPORT_CONSUMER" "live owner_report_consumer.py"
check_exists "$OWNER_REPORT_PRODUCER" "live owner_report_producer.py"
check_exists "$OWNER_REPORT_DRIVER" "live owner_report_driver.py"
echo
echo "[runtime]"
check_message_cli
if [[ -n "$NODE_BIN_RAW" ]]; then
check_exec_path "$NODE_BIN_RAW" "node"
else
fail "node runtime unresolved"
fi
check_exec_path "python3" "python3"
check_systemd_user
echo
echo "[discord-route minimal config]"
check_env_target
if [[ $FAILURES -gt 0 ]]; then
echo
fail "bootstrap failed with $FAILURES issue(s)"
exit 1
fi
echo
pass "bootstrap checks passed"

68
scripts/check_openclaw_state.sh Executable file
View File

@@ -0,0 +1,68 @@
#!/usr/bin/env bash
set -euo pipefail
# Watchdog B MVP tri-state checker for OpenClaw main runtime.
# Output (stdout): exactly one token: running | stalled | idle
#
# Heuristic (MVP):
# - If openclaw.pid exists and process is alive => running unless logs are stale.
# - If process alive but log file hasn't changed for STALL_AFTER_SECONDS => stalled.
# - Otherwise => idle.
#
# Future extension point:
# - Replace/augment log-freshness with real main-agent session/ledger signals.
PID_FILE_DEFAULT="${OPENCLAW_PID_FILE:-/home/chchang/.openclaw/workspace/host-runtime/openclaw.pid}"
LOG_FILE_DEFAULT="${OPENCLAW_LOG_FILE:-/home/chchang/.openclaw/workspace/logs/openclaw.log}"
STALL_AFTER_SECONDS="${STALL_AFTER_SECONDS:-1200}" # 20 minutes default
NOW_EPOCH="$(date +%s)"
pid_file="$PID_FILE_DEFAULT"
log_file="$LOG_FILE_DEFAULT"
get_mtime_epoch() {
# GNU stat: %Y; BSD stat: -f %m
local path="$1"
if stat -c %Y "$path" >/dev/null 2>&1; then
stat -c %Y "$path"
else
stat -f %m "$path"
fi
}
proc_alive() {
local pid="$1"
[[ -n "$pid" ]] || return 1
[[ "$pid" =~ ^[0-9]+$ ]] || return 1
kill -0 "$pid" >/dev/null 2>&1
}
# No pid file => idle
if [[ ! -f "$pid_file" ]]; then
echo "idle"
exit 0
fi
pid="$(tr -d ' \t\n\r' < "$pid_file" || true)"
# PID file exists but process not alive => idle
if ! proc_alive "$pid"; then
echo "idle"
exit 0
fi
# Process alive. If no log file, assume running (can't assess stall)
if [[ ! -f "$log_file" ]]; then
echo "running"
exit 0
fi
log_mtime="$(get_mtime_epoch "$log_file")"
age=$(( NOW_EPOCH - log_mtime ))
if (( age > STALL_AFTER_SECONDS )); then
echo "stalled"
else
echo "running"
fi

View File

@@ -0,0 +1,136 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
SKILL_DIR="$(cd -- "$SCRIPT_DIR/.." && pwd)"
HOME_DIR="${HOME:?HOME is required}"
WORKSPACE_DEFAULT="$HOME_DIR/.openclaw/workspace"
WORKSPACE="${WATCHDOG_B_WORKSPACE:-$WORKSPACE_DEFAULT}"
SYSTEMD_USER_DIR="${WATCHDOG_B_SYSTEMD_USER_DIR:-$HOME_DIR/.config/systemd/user}"
CONFIG_DIR="${WATCHDOG_B_CONFIG_DIR:-$HOME_DIR/.config/openclaw}"
LIVE_SCRIPT_DIR="${WATCHDOG_B_LIVE_SCRIPT_DIR:-$WORKSPACE/scripts/watchdog-b}"
INSTALL_ENV_EXAMPLE=0
FORCE=0
usage() {
cat <<EOF
Usage: $(basename "$0") [options]
Install bundled watchdog-discord-route assets into live paths.
Options:
--workspace PATH Target workspace (default: $WORKSPACE_DEFAULT)
--systemd-user-dir PATH Target systemd --user unit dir (default: ~/.config/systemd/user)
--config-dir PATH Target config dir (default: ~/.config/openclaw)
--live-script-dir PATH Target live watchdog script dir (default: <workspace>/scripts/watchdog-b)
--install-env-example Also install watchdog-b.env.example to <config-dir>/watchdog-b.env.example
--force Overwrite existing files in live paths
-h, --help Show this help
EOF
}
while [[ $# -gt 0 ]]; do
case "$1" in
--workspace)
WORKSPACE="$2"; shift 2 ;;
--systemd-user-dir)
SYSTEMD_USER_DIR="$2"; shift 2 ;;
--config-dir)
CONFIG_DIR="$2"; shift 2 ;;
--live-script-dir)
LIVE_SCRIPT_DIR="$2"; shift 2 ;;
--install-env-example)
INSTALL_ENV_EXAMPLE=1; shift ;;
--force)
FORCE=1; shift ;;
-h|--help)
usage; exit 0 ;;
*)
echo "unknown argument: $1" >&2
usage >&2
exit 2 ;;
esac
done
mkdir -p "$LIVE_SCRIPT_DIR" "$SYSTEMD_USER_DIR" "$CONFIG_DIR"
copy_file() {
local src="$1"
local dest="$2"
if [[ -e "$dest" && "$FORCE" != "1" ]]; then
echo "skip existing: $dest"
return 0
fi
install -m 0644 "$src" "$dest"
echo "installed: $dest"
}
copy_exec() {
local src="$1"
local dest="$2"
if [[ -e "$dest" && "$FORCE" != "1" ]]; then
echo "skip existing: $dest"
return 0
fi
install -m 0755 "$src" "$dest"
echo "installed: $dest"
}
render_service() {
local src="$SCRIPT_DIR/openclaw-watchdog-b.service"
local dest="$SYSTEMD_USER_DIR/openclaw-watchdog-b.service"
if [[ -e "$dest" && "$FORCE" != "1" ]]; then
echo "skip existing: $dest"
return 0
fi
sed \
-e "s#%h/.openclaw/workspace#${WORKSPACE//\#/\\#}#g" \
-e "s#%h/.config/openclaw#${CONFIG_DIR//\#/\\#}#g" \
-e "s#%h/.openclaw/workspace/scripts/watchdog-b#${LIVE_SCRIPT_DIR//\#/\\#}#g" \
"$src" > "$dest"
chmod 0644 "$dest"
echo "installed: $dest"
}
copy_exec "$SCRIPT_DIR/check_openclaw_state.sh" "$LIVE_SCRIPT_DIR/check_openclaw_state.sh"
copy_exec "$SCRIPT_DIR/run_watchdog_b.sh" "$LIVE_SCRIPT_DIR/run_watchdog_b.sh"
copy_exec "$SCRIPT_DIR/verify_watchdog_b_e2e.sh" "$LIVE_SCRIPT_DIR/verify_watchdog_b_e2e.sh"
copy_exec "$SCRIPT_DIR/notify_watchdog_b.py" "$LIVE_SCRIPT_DIR/notify_watchdog_b.py"
copy_exec "$SCRIPT_DIR/openclaw_runtime_probe.py" "$LIVE_SCRIPT_DIR/openclaw_runtime_probe.py"
copy_file "$SCRIPT_DIR/owner_report_consumer.py" "$LIVE_SCRIPT_DIR/owner_report_consumer.py"
copy_file "$SCRIPT_DIR/owner_report_driver.py" "$LIVE_SCRIPT_DIR/owner_report_driver.py"
copy_file "$SCRIPT_DIR/owner_report_producer.py" "$LIVE_SCRIPT_DIR/owner_report_producer.py"
copy_file "$SCRIPT_DIR/openclaw-watchdog-b.timer" "$SYSTEMD_USER_DIR/openclaw-watchdog-b.timer"
render_service
if [[ "$INSTALL_ENV_EXAMPLE" == "1" ]]; then
copy_file "$SCRIPT_DIR/watchdog-b.env.example" "$CONFIG_DIR/watchdog-b.env.example"
fi
cat <<EOF
Install summary
- skill_dir: $SKILL_DIR
- workspace: $WORKSPACE
- live_script_dir: $LIVE_SCRIPT_DIR
- systemd_user_dir: $SYSTEMD_USER_DIR
- config_dir: $CONFIG_DIR
Operator install order
1. Install bundle files:
./scripts/install_watchdog_bundle.sh --install-env-example
2. Create live env if missing:
mkdir -p "$CONFIG_DIR"
cp "$CONFIG_DIR/watchdog-b.env.example" "$CONFIG_DIR/watchdog-b.env"
3. Edit live env and set at least:
WATCHDOG_B_OWNER_REPORT_TARGET=channel:YOUR_DISCORD_CHANNEL_ID
4. Run bootstrap:
./scripts/bootstrap_watchdog_bundle.sh
5. Only after bootstrap passes:
systemctl --user daemon-reload
systemctl --user enable --now openclaw-watchdog-b.timer
Notes
- If $CONFIG_DIR/watchdog-b.env does not exist, bootstrap will warn/fail until you create it.
- The env example is intentionally installed as watchdog-b.env.example first; copy it to watchdog-b.env after editing.
EOF

467
scripts/notify_watchdog_b.py Executable file
View File

@@ -0,0 +1,467 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import json
import os
import shutil
import subprocess
import sys
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
SCRIPT_DIR = Path(__file__).resolve().parent
SKILL_DIR = SCRIPT_DIR.parent
WORKSPACE = Path(os.environ.get("WATCHDOG_B_WORKSPACE", str(Path.home() / ".openclaw" / "workspace")))
CONFIG_FILE = Path(os.environ.get("WATCHDOG_B_CONFIG_FILE", str(Path.home() / ".config" / "openclaw" / "watchdog-b.env")))
LIVE_SCRIPT_DIR = Path(os.environ.get("WATCHDOG_B_LIVE_SCRIPT_DIR", str(WORKSPACE / "scripts" / "watchdog-b")))
def load_env_file(path: Path) -> None:
if not path.exists():
return
for raw_line in path.read_text(encoding="utf-8").splitlines():
line = raw_line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, value = line.split("=", 1)
key = key.strip()
if not key:
continue
value = value.strip()
if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")):
value = value[1:-1]
os.environ.setdefault(key, value)
load_env_file(CONFIG_FILE)
STATE_DIR = Path(os.environ.get("WATCHDOG_B_ARTIFACT_DIR", str(WORKSPACE / "state" / "watchdog-b")))
NOTIFY_STATE_PATH = STATE_DIR / "notify-state.json"
OWNER_PRODUCER = Path(os.environ.get("WATCHDOG_B_OWNER_PRODUCER", str(SCRIPT_DIR / "owner_report_producer.py")))
OWNER_DRIVER = Path(os.environ.get("WATCHDOG_B_OWNER_DRIVER", str(SCRIPT_DIR / "owner_report_driver.py")))
PYTHON_BIN = os.environ.get("WATCHDOG_B_PYTHON_BIN", sys.executable or "python3")
WATCHDOG_OWNER_REPORT_CHANNEL = os.environ.get("WATCHDOG_B_OWNER_REPORT_CHANNEL", "discord")
WATCHDOG_OWNER_REPORT_TARGET = os.environ.get("WATCHDOG_B_OWNER_REPORT_TARGET", "channel:REPLACE_ME")
WATCHDOG_MAIN_AGENT_ID = os.environ.get("WATCHDOG_B_MAIN_AGENT_ID", "").strip()
HOSTNAME = os.uname().nodename
UTC = timezone.utc
RUNTIME_PROBE = Path(os.environ.get("WATCHDOG_B_RUNTIME_PROBE", str(SCRIPT_DIR / "openclaw_runtime_probe.py")))
RUNTIME_CACHE: dict[str, Path] | None = None
DEFAULTS = {
"running_min_interval_seconds": 3600,
"stalled_nudge_min_interval_seconds": 900,
"idle_nudge_min_interval_seconds": 1800,
"stalled_owner_escalation_after": 2,
"idle_owner_escalation_after": 2,
}
def now_iso() -> str:
return datetime.now().astimezone().isoformat(timespec="seconds")
def path_or_none(value: str | None) -> Path | None:
if not value:
return None
return Path(value).expanduser()
def detect_runtime_paths() -> dict[str, Path]:
global RUNTIME_CACHE
if RUNTIME_CACHE is not None:
return RUNTIME_CACHE
node_bin = path_or_none(os.environ.get("WATCHDOG_B_NODE_BIN"))
openclaw_mjs = path_or_none(os.environ.get("WATCHDOG_B_OPENCLAW_MJS"))
openclaw_entry = path_or_none(os.environ.get("WATCHDOG_B_OPENCLAW_ENTRY"))
if node_bin and node_bin.exists() and os.access(node_bin, os.X_OK) and openclaw_mjs and openclaw_mjs.is_file() and openclaw_entry and openclaw_entry.is_file():
RUNTIME_CACHE = {
"node": node_bin,
"openclaw_mjs": openclaw_mjs,
"openclaw_entry": openclaw_entry,
}
return RUNTIME_CACHE
if RUNTIME_PROBE.exists():
proc = subprocess.run([PYTHON_BIN, str(RUNTIME_PROBE)], text=True, capture_output=True)
if proc.returncode == 0:
payload = json.loads(proc.stdout)
detected = payload.get("detected", {})
RUNTIME_CACHE = {
"node": Path(detected["node"]),
"openclaw_mjs": Path(detected["openclaw_mjs"]),
"openclaw_entry": Path(detected["openclaw_entry"]),
}
return RUNTIME_CACHE
node_which = shutil.which("node")
if node_which:
node_bin = Path(node_which)
missing = []
if not node_bin or not node_bin.exists():
missing.append("WATCHDOG_B_NODE_BIN")
if not openclaw_mjs or not openclaw_mjs.is_file():
missing.append("WATCHDOG_B_OPENCLAW_MJS")
if not openclaw_entry or not openclaw_entry.is_file():
missing.append("WATCHDOG_B_OPENCLAW_ENTRY")
raise RuntimeError(
"Unable to auto-detect watchdog runtime paths. Missing: " + ", ".join(missing)
)
def load_state() -> dict[str, Any]:
if NOTIFY_STATE_PATH.exists():
try:
return json.loads(NOTIFY_STATE_PATH.read_text(encoding="utf-8"))
except Exception:
pass
return {"events": {}}
def save_state(data: dict[str, Any]) -> None:
STATE_DIR.mkdir(parents=True, exist_ok=True)
NOTIFY_STATE_PATH.write_text(json.dumps(data, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
def event_bucket(state: str) -> dict[str, Any]:
data = load_state()
events = data.setdefault("events", {})
bucket = events.setdefault(state, {})
return data
def get_bucket(data: dict[str, Any], state: str) -> dict[str, Any]:
events = data.setdefault("events", {})
return events.setdefault(state, {})
def should_send(bucket: dict[str, Any], min_interval_seconds: int, timestamp: datetime) -> tuple[bool, str]:
last_sent = bucket.get("last_sent_at")
if not last_sent:
return True, "first-send"
try:
prev = datetime.fromisoformat(last_sent)
except Exception:
return True, "state-corrupt-reset"
elapsed = (timestamp - prev).total_seconds()
if elapsed >= min_interval_seconds:
return True, f"interval-ok:{int(elapsed)}s"
return False, f"throttled:{int(elapsed)}s<{min_interval_seconds}s"
def mark_sent(bucket: dict[str, Any], channel: str, timestamp: str, detail: dict[str, Any] | None = None) -> None:
bucket["last_sent_at"] = timestamp
bucket["last_channel"] = channel
bucket["send_count"] = int(bucket.get("send_count", 0)) + 1
bucket["last_detail"] = detail or {}
def build_owner_message(state: str, timestamp: str, detail: str) -> dict[str, str]:
emoji_default = {
"running": "",
"stalled": "⚠️",
"idle": "🛑",
}
summary_default = {
"running": "主程序仍在運行",
"stalled": "主程序疑似卡住",
"idle": "主程序目前未運行",
}
progress_default = {
"running": "running",
"stalled": "stalled",
"idle": "idle",
}
status_default = {
"running": "normal",
"stalled": "needs-attention",
"idle": "needs-attention",
}
source_default = {
"running": "watchdog-b-running",
"stalled": "watchdog-b-stalled-escalation",
"idle": "watchdog-b-idle-escalation",
}
detail_default = {
"running": f"checked_at={timestamp} host={HOSTNAME}",
"stalled": f"checked_at={timestamp} host={HOSTNAME}; stale activity detected while process still looked alive",
"idle": f"checked_at={timestamp} host={HOSTNAME}; no active main runtime detected",
}
return {
"progress": os.environ.get(f"WATCHDOG_B_{state.upper()}_PROGRESS_LABEL", progress_default[state]),
"done": f"{os.environ.get(f'WATCHDOG_B_{state.upper()}_EMOJI', emoji_default[state])} {os.environ.get(f'WATCHDOG_B_{state.upper()}_SUMMARY', summary_default[state])}",
"next": detail or os.environ.get(f"WATCHDOG_B_{state.upper()}_DETAIL", detail_default[state]),
"status": os.environ.get(f"WATCHDOG_B_{state.upper()}_STATUS", status_default[state]),
"source": os.environ.get(f"WATCHDOG_B_{state.upper()}_SOURCE", source_default[state]),
}
def enqueue_owner_report(*, state: str, timestamp: str, dry_run: bool, detail: str) -> dict[str, Any]:
msg = build_owner_message(state, timestamp, detail)
report_id = f"watchdog-b-{state}-{datetime.now(UTC).strftime('%Y%m%dT%H%M%SZ')}"
cmd = [
PYTHON_BIN,
str(OWNER_PRODUCER),
"--team",
"watchdog-b",
"--worker",
HOSTNAME,
"--task-id",
f"openclaw-main-{state}",
"--progress",
msg["progress"],
"--done",
msg["done"],
"--next",
msg["next"],
"--status",
msg["status"],
"--source",
msg["source"],
"--report-id",
report_id,
]
if dry_run:
cmd.append("--dry-run")
proc = subprocess.run(cmd, text=True, capture_output=True)
result = {
"kind": "owner-report-enqueue",
"ok": proc.returncode == 0,
"command": cmd,
"exit_code": proc.returncode,
"stdout": proc.stdout,
"stderr": proc.stderr,
"report_id": report_id,
"dry_run": dry_run,
}
if proc.returncode == 0 and not dry_run:
result["pending_path"] = str(Path.home() / ".clawteam" / "owner-reports" / "pending" / f"{report_id}.md")
return result
def build_owner_send_cmd() -> str:
runtime = detect_runtime_paths()
return (
f'"{runtime["node"]}" "{runtime["openclaw_entry"]}" message send '
f'--channel {WATCHDOG_OWNER_REPORT_CHANNEL} '
f"--target '{WATCHDOG_OWNER_REPORT_TARGET}' "
f'--message "$OWNER_REPORT_MESSAGE"'
)
def deliver_owner_report(*, report_id: str, dry_run: bool) -> dict[str, Any]:
send_cmd = build_owner_send_cmd()
cmd = [PYTHON_BIN, str(OWNER_DRIVER), report_id, "--send-cmd", send_cmd]
if dry_run:
cmd.append("--dry-run")
proc = subprocess.run(cmd, text=True, capture_output=True)
return {
"kind": "owner-report-direct-delivery",
"ok": proc.returncode == 0,
"command": cmd,
"send_cmd": send_cmd,
"exit_code": proc.returncode,
"stdout": proc.stdout,
"stderr": proc.stderr,
"dry_run": dry_run,
"report_id": report_id,
"target_channel": WATCHDOG_OWNER_REPORT_CHANNEL,
"target": WATCHDOG_OWNER_REPORT_TARGET,
}
def call_main_agent(*, state: str, timestamp: str, dry_run: bool) -> dict[str, Any]:
message = (
f"[watchdog-b][{state}] {timestamp}\n"
f"Host: {HOSTNAME}\n"
f"Please confirm current task state, whether progress is blocked, and whether owner-facing escalation is needed."
)
if not WATCHDOG_MAIN_AGENT_ID:
return {
"kind": "main-agent-nudge",
"ok": True,
"skipped": True,
"reason": "WATCHDOG_B_MAIN_AGENT_ID not configured",
"dry_run": dry_run,
"message": message,
}
try:
runtime = detect_runtime_paths()
except Exception as exc:
return {
"kind": "main-agent-nudge",
"ok": False,
"dry_run": dry_run,
"error": str(exc),
"message": message,
}
cmd = [
str(runtime["node"]),
str(runtime["openclaw_mjs"]),
"agent",
"--agent",
WATCHDOG_MAIN_AGENT_ID,
"--message",
message,
"--timeout",
os.environ.get("WATCHDOG_B_MAIN_AGENT_TIMEOUT", "120"),
]
if dry_run:
return {"kind": "main-agent-nudge", "ok": True, "dry_run": True, "command": cmd, "message": message}
try:
proc = subprocess.run(cmd, text=True, capture_output=True, timeout=int(os.environ.get("WATCHDOG_B_MAIN_AGENT_TIMEOUT", "120")) + 10)
return {
"kind": "main-agent-nudge",
"ok": proc.returncode == 0,
"dry_run": False,
"command": cmd,
"exit_code": proc.returncode,
"stdout": proc.stdout,
"stderr": proc.stderr,
"message": message,
}
except subprocess.TimeoutExpired as e:
return {
"kind": "main-agent-nudge",
"ok": False,
"dry_run": False,
"command": cmd,
"timeout": True,
"stdout": e.stdout,
"stderr": e.stderr,
"message": message,
}
def maybe_running_report(data: dict[str, Any], bucket: dict[str, Any], timestamp: str, dry_run: bool) -> dict[str, Any]:
mode = os.environ.get("WATCHDOG_B_RUNNING_REPORT_MODE", "manual").lower()
min_interval = int(os.environ.get("WATCHDOG_B_RUNNING_REPORT_MIN_INTERVAL_SECONDS", str(DEFAULTS["running_min_interval_seconds"])))
allowed, reason = should_send(bucket, min_interval, datetime.fromisoformat(timestamp))
result: dict[str, Any] = {
"state": "running",
"route": "owner-report",
"mode": mode,
"allowed": allowed,
"reason": reason,
"dry_run": dry_run,
}
if mode not in {"manual", "enqueue", "enqueue-and-drain"}:
result.update({"ok": False, "error": f"unsupported running mode: {mode}"})
return result
if mode == "manual":
result.update({
"ok": True,
"action": "manual-only",
"hint": "set WATCHDOG_B_RUNNING_REPORT_MODE=enqueue to create a real pending item, or enqueue-and-drain to enqueue and directly deliver it to Discord",
})
return result
if not allowed:
result.update({"ok": True, "action": "suppressed"})
return result
enqueue = enqueue_owner_report(state="running", timestamp=timestamp, dry_run=dry_run, detail="Main runtime alive and log activity fresh.")
result["enqueue"] = enqueue
result["ok"] = enqueue.get("ok", False)
if enqueue.get("ok"):
mark_sent(bucket, "owner-report-enqueue", timestamp, {"report_id": enqueue.get("report_id")})
if mode == "enqueue-and-drain" and enqueue.get("ok"):
deliver = deliver_owner_report(report_id=enqueue.get("report_id"), dry_run=dry_run)
result["deliver"] = deliver
result["ok"] = result["ok"] and deliver.get("ok", False)
if deliver.get("ok"):
mark_sent(bucket, "owner-report-direct-delivery", timestamp, {"report_id": enqueue.get("report_id")})
return result
def maybe_nudge_and_escalate(data: dict[str, Any], bucket: dict[str, Any], *, state: str, timestamp: str, dry_run: bool) -> dict[str, Any]:
is_stalled = state == "stalled"
nudge_min = int(os.environ.get(
"WATCHDOG_B_STALLED_NUDGE_MIN_INTERVAL_SECONDS" if is_stalled else "WATCHDOG_B_IDLE_NUDGE_MIN_INTERVAL_SECONDS",
str(DEFAULTS["stalled_nudge_min_interval_seconds"] if is_stalled else DEFAULTS["idle_nudge_min_interval_seconds"]),
))
escalation_after = int(os.environ.get(
"WATCHDOG_B_STALLED_OWNER_ESCALATION_AFTER" if is_stalled else "WATCHDOG_B_IDLE_OWNER_ESCALATION_AFTER",
str(DEFAULTS["stalled_owner_escalation_after"] if is_stalled else DEFAULTS["idle_owner_escalation_after"]),
))
owner_mode = os.environ.get(
"WATCHDOG_B_STALLED_OWNER_MODE" if is_stalled else "WATCHDOG_B_IDLE_OWNER_MODE",
"escalate",
).lower()
bucket["seen_count"] = int(bucket.get("seen_count", 0)) + 1
allowed, reason = should_send(bucket, nudge_min, datetime.fromisoformat(timestamp))
result: dict[str, Any] = {
"state": state,
"route": "main-agent-then-owner",
"allowed": allowed,
"reason": reason,
"seen_count": bucket["seen_count"],
"owner_mode": owner_mode,
"dry_run": dry_run,
}
if allowed:
nudge = call_main_agent(state=state, timestamp=timestamp, dry_run=dry_run)
result["main_agent_nudge"] = nudge
if nudge.get("ok"):
mark_sent(bucket, "main-agent", timestamp, {"state": state})
result["ok"] = nudge.get("ok", False)
else:
result.update({"ok": True, "action": "nudge-suppressed"})
should_escalate = owner_mode in {"always", "escalate"} and bucket["seen_count"] >= escalation_after
if owner_mode == "never":
should_escalate = False
if should_escalate:
owner_allowed, owner_reason = should_send(bucket, nudge_min, datetime.fromisoformat(timestamp))
result["owner_escalation_gate"] = {"allowed": owner_allowed, "reason": owner_reason, "threshold": escalation_after}
if owner_allowed:
detail = "Main agent was nudged repeatedly; please review whether manual intervention is needed."
enqueue = enqueue_owner_report(state=state, timestamp=timestamp, dry_run=dry_run, detail=detail)
result["owner_enqueue"] = enqueue
result["ok"] = result.get("ok", True) and enqueue.get("ok", False)
if enqueue.get("ok"):
mark_sent(bucket, "owner-report-enqueue", timestamp, {"report_id": enqueue.get("report_id"), "state": state})
owner_delivery_mode = os.environ.get(
"WATCHDOG_B_OWNER_DELIVERY_MODE",
"enqueue-only",
).lower()
result["owner_delivery_mode"] = owner_delivery_mode
if owner_delivery_mode == "direct-discord":
deliver = deliver_owner_report(report_id=enqueue.get("report_id"), dry_run=dry_run)
result["owner_deliver"] = deliver
result["ok"] = result.get("ok", True) and deliver.get("ok", False)
if deliver.get("ok"):
mark_sent(bucket, "owner-report-direct-delivery", timestamp, {"report_id": enqueue.get("report_id"), "state": state})
return result
def main() -> int:
ap = argparse.ArgumentParser(description="Notification layer for watchdog-b")
ap.add_argument("--state", required=True, choices=["running", "stalled", "idle"])
ap.add_argument("--timestamp", default=now_iso())
ap.add_argument("--dry-run", action="store_true")
args = ap.parse_args()
data = load_state()
bucket = get_bucket(data, args.state)
if args.state == "running":
result = maybe_running_report(data, bucket, args.timestamp, args.dry_run)
else:
result = maybe_nudge_and_escalate(data, bucket, state=args.state, timestamp=args.timestamp, dry_run=args.dry_run)
bucket["last_seen_at"] = args.timestamp
bucket["last_result"] = result
save_state(data)
print(json.dumps(result, ensure_ascii=False, indent=2))
return 0 if result.get("ok", False) else 1
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,17 @@
# Template systemd --user unit for Watchdog B.
# Install to: ~/.config/systemd/user/openclaw-watchdog-b.service
# Optional env file: ~/.config/openclaw/watchdog-b.env
[Unit]
Description=OpenClaw Watchdog B (verified direct Discord owner-facing path)
After=network-online.target
Wants=network-online.target
[Service]
Type=oneshot
WorkingDirectory=%h/.openclaw/workspace
Environment=WATCHDOG_B_CONFIG_FILE=%h/.config/openclaw/watchdog-b.env
EnvironmentFile=-%h/.config/openclaw/watchdog-b.env
ExecStart=%h/.openclaw/workspace/scripts/watchdog-b/run_watchdog_b.sh
StandardOutput=journal
StandardError=journal

View File

@@ -0,0 +1,15 @@
# Template systemd --user timer (DO NOT auto-install).
# Runs every 10 minutes.
[Unit]
Description=Run OpenClaw Watchdog B every 10 minutes
[Timer]
OnCalendar=*:0/10
Persistent=true
# Optional jitter to avoid synchronized runs
RandomizedDelaySec=30
Unit=openclaw-watchdog-b.service
[Install]
WantedBy=timers.target

View File

@@ -0,0 +1,200 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import json
import os
import shutil
from pathlib import Path
from typing import Iterable
HOME = Path.home()
ENV_KEYS = {
"node": "WATCHDOG_B_NODE_BIN",
"openclaw_mjs": "WATCHDOG_B_OPENCLAW_MJS",
"openclaw_entry": "WATCHDOG_B_OPENCLAW_ENTRY",
}
def dedupe(items: Iterable[Path]) -> list[Path]:
seen: set[str] = set()
out: list[Path] = []
for item in items:
key = str(item)
if key in seen:
continue
seen.add(key)
out.append(item)
return out
def path_candidates() -> tuple[Path | None, list[Path], list[Path]]:
node_path = shutil.which("node")
openclaw_path = shutil.which("openclaw")
node_candidate = Path(node_path).resolve() if node_path else None
roots: list[Path] = []
entry_candidates: list[Path] = []
if openclaw_path:
op = Path(openclaw_path).resolve()
roots.extend([
op.parent.parent / "lib" / "node_modules" / "openclaw",
op.parent.parent.parent / "lib" / "node_modules" / "openclaw",
])
entry_candidates.append(op.parent.parent / "lib" / "node_modules" / "openclaw" / "dist" / "entry.js")
if node_candidate:
roots.append(node_candidate.parent.parent / "lib" / "node_modules" / "openclaw")
return node_candidate, dedupe(roots), dedupe(entry_candidates)
def common_roots() -> list[Path]:
roots: list[Path] = []
nvm_dir = Path(os.environ.get("NVM_DIR", HOME / ".nvm")).expanduser()
roots.extend([
HOME / ".nvm" / "versions" / "node",
nvm_dir / "versions" / "node",
HOME / ".local" / "share" / "pnpm" / "global",
HOME / ".npm-global",
Path("/usr/local"),
Path("/usr"),
HOME / ".volta" / "tools" / "image",
])
return dedupe(roots)
def scan_openclaw_install_roots() -> list[Path]:
candidates: list[Path] = []
for root in common_roots():
if not root.exists():
continue
if root.name == "node":
for child in sorted(root.glob("v*/lib/node_modules/openclaw"), reverse=True):
candidates.append(child)
continue
patterns = [
"lib/node_modules/openclaw",
"node_modules/openclaw",
"*/lib/node_modules/openclaw",
"*/node_modules/openclaw",
]
for pattern in patterns:
for child in sorted(root.glob(pattern), reverse=True):
candidates.append(child)
return dedupe(candidates)
def valid_node(path: Path | None) -> Path | None:
if path and path.exists() and os.access(path, os.X_OK):
return path
return None
def valid_file(path: Path | None) -> Path | None:
if path and path.is_file():
return path
return None
def detect_runtime() -> dict[str, object]:
result: dict[str, object] = {"ok": False, "detected": {}, "sources": {}, "searched": {}}
detected: dict[str, str] = {}
sources: dict[str, str] = {}
searched: dict[str, list[str]] = {"node": [], "openclaw": []}
env_node = os.environ.get(ENV_KEYS["node"])
if env_node:
searched["node"].append(env_node)
node = valid_node(Path(env_node).expanduser())
if node:
detected["node"] = str(node)
sources["node"] = f"env:{ENV_KEYS['node']}"
env_mjs = os.environ.get(ENV_KEYS["openclaw_mjs"])
if env_mjs:
searched["openclaw"].append(env_mjs)
mjs = valid_file(Path(env_mjs).expanduser())
if mjs:
detected["openclaw_mjs"] = str(mjs)
sources["openclaw_mjs"] = f"env:{ENV_KEYS['openclaw_mjs']}"
env_entry = os.environ.get(ENV_KEYS["openclaw_entry"])
if env_entry:
searched["openclaw"].append(env_entry)
entry = valid_file(Path(env_entry).expanduser())
if entry:
detected["openclaw_entry"] = str(entry)
sources["openclaw_entry"] = f"env:{ENV_KEYS['openclaw_entry']}"
path_node, path_roots, path_entry_candidates = path_candidates()
if "node" not in detected and path_node:
searched["node"].append(str(path_node))
node = valid_node(path_node)
if node:
detected["node"] = str(node)
sources["node"] = "path:node"
install_roots = dedupe(path_roots + path_entry_candidates + scan_openclaw_install_roots())
searched["openclaw"].extend(str(p) for p in install_roots)
def fill_from_root(root: Path, source: str) -> None:
if root.is_file():
candidate_entry = valid_file(root)
if candidate_entry and candidate_entry.name == "entry.js" and "openclaw_entry" not in detected:
detected["openclaw_entry"] = str(candidate_entry)
sources["openclaw_entry"] = source
root = candidate_entry.parent.parent
elif candidate_entry and candidate_entry.name == "openclaw.mjs" and "openclaw_mjs" not in detected:
detected["openclaw_mjs"] = str(candidate_entry)
sources["openclaw_mjs"] = source
root = candidate_entry.parent
else:
return
candidate_mjs = valid_file(root / "openclaw.mjs")
candidate_entry = valid_file(root / "dist" / "entry.js")
if candidate_mjs and "openclaw_mjs" not in detected:
detected["openclaw_mjs"] = str(candidate_mjs)
sources["openclaw_mjs"] = source
if candidate_entry and "openclaw_entry" not in detected:
detected["openclaw_entry"] = str(candidate_entry)
sources["openclaw_entry"] = source
for root in install_roots:
source = "path:openclaw" if root in path_roots or root in path_entry_candidates else "scan:common-locations"
fill_from_root(root, source)
if all(k in detected for k in ("openclaw_mjs", "openclaw_entry")):
break
result["detected"] = detected
result["sources"] = sources
result["searched"] = searched
result["ok"] = all(k in detected for k in ("node", "openclaw_mjs", "openclaw_entry"))
if not result["ok"]:
missing = [k for k in ("node", "openclaw_mjs", "openclaw_entry") if k not in detected]
result["missing"] = missing
result["error"] = (
"Could not auto-detect: " + ", ".join(missing) + ". "
"Set WATCHDOG_B_NODE_BIN / WATCHDOG_B_OPENCLAW_MJS / WATCHDOG_B_OPENCLAW_ENTRY explicitly if this host uses a non-standard install path."
)
return result
def main() -> int:
parser = argparse.ArgumentParser(description="Detect node/openclaw runtime paths for watchdog-b scripts")
parser.add_argument("--shell", action="store_true", help="print shell export lines")
parser.add_argument("--pretty", action="store_true", help="pretty-print json")
args = parser.parse_args()
result = detect_runtime()
if args.shell:
if not result["ok"]:
print(result["error"], flush=True)
return 1
detected = result["detected"]
print(f'WATCHDOG_B_NODE_BIN={detected["node"]}')
print(f'WATCHDOG_B_OPENCLAW_MJS={detected["openclaw_mjs"]}')
print(f'WATCHDOG_B_OPENCLAW_ENTRY={detected["openclaw_entry"]}')
return 0
print(json.dumps(result, ensure_ascii=False, indent=2 if args.pretty else None))
return 0 if result["ok"] else 1
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,75 @@
#!/usr/bin/env python3
"""Minimal owner-report consumer.
Reads a pending owner report markdown file with simple front-matter-like key/value
lines and emits normalized JSON to stdout.
"""
from __future__ import annotations
import argparse
import json
from pathlib import Path
OWNER_REPORT_ROOT = Path.home() / ".clawteam" / "owner-reports"
PENDING_DIR = OWNER_REPORT_ROOT / "pending"
def parse_pending_report(path: Path) -> dict:
raw = path.read_text(encoding="utf-8")
data: dict[str, str] = {}
for line in raw.splitlines():
line = line.strip()
if not line or ":" not in line:
continue
key, value = line.split(":", 1)
data[key.strip()] = value.strip()
return {
"ok": True,
"path": str(path),
"filename": path.name,
"report_id": data.get("report_id") or path.stem,
"team": data.get("team"),
"source": data.get("source"),
"report_kind": data.get("report_kind") or "checkpoint",
"created_at": data.get("created_at"),
"message": _unquote(data.get("message", "")),
"raw": data,
}
def _unquote(value: str) -> str:
value = value.strip()
if len(value) >= 2 and value[0] == '"' and value[-1] == '"':
return value[1:-1]
return value
def resolve_input(name_or_path: str) -> Path:
p = Path(name_or_path).expanduser()
if p.exists():
return p
candidate = PENDING_DIR / name_or_path
if candidate.exists():
return candidate
if not candidate.suffix:
md_candidate = candidate.with_suffix(".md")
if md_candidate.exists():
return md_candidate
raise FileNotFoundError(f"pending report not found: {name_or_path}")
def main() -> int:
ap = argparse.ArgumentParser(description="Emit JSON for a pending owner report")
ap.add_argument("report", help="Pending report path, filename, or report_id")
args = ap.parse_args()
path = resolve_input(args.report)
payload = parse_pending_report(path)
print(json.dumps(payload, ensure_ascii=False, indent=2))
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,118 @@
#!/usr/bin/env python3
"""Minimal owner-report driver.
Consumes one pending owner report, calls an external send command, and only moves
it to sent/ after the send command succeeds.
This is a deliberately small manual driver for debugging the owner-report chain.
It does not watch directories, retry, or send anything by itself.
"""
from __future__ import annotations
import argparse
import json
import os
import subprocess
from pathlib import Path
from owner_report_consumer import OWNER_REPORT_ROOT, PENDING_DIR, parse_pending_report, resolve_input
SENT_DIR = OWNER_REPORT_ROOT / "sent"
def _build_send_env(payload: dict) -> dict[str, str]:
env = os.environ.copy()
env.update(
{
"OWNER_REPORT_JSON": json.dumps(payload, ensure_ascii=False),
"OWNER_REPORT_ID": str(payload.get("report_id") or ""),
"OWNER_REPORT_TEAM": str(payload.get("team") or ""),
"OWNER_REPORT_SOURCE": str(payload.get("source") or ""),
"OWNER_REPORT_KIND": str(payload.get("report_kind") or "checkpoint"),
"OWNER_REPORT_CREATED_AT": str(payload.get("created_at") or ""),
"OWNER_REPORT_MESSAGE": str(payload.get("message") or ""),
"OWNER_REPORT_PATH": str(payload.get("path") or ""),
}
)
return env
def _sent_path(src: Path) -> Path:
SENT_DIR.mkdir(parents=True, exist_ok=True)
return SENT_DIR / src.name
def _finalize_successful_send(src: Path) -> dict[str, object]:
dest = _sent_path(src)
if src.exists():
src.rename(dest)
return {"moved": True, "already_archived": False, "final_path": str(dest)}
if dest.exists():
return {"moved": False, "already_archived": True, "final_path": str(dest)}
raise FileNotFoundError(
f"successful send completed but pending report disappeared before archiving: pending={src} sent={dest}"
)
def main() -> int:
ap = argparse.ArgumentParser(description="Send one pending owner report via external command")
ap.add_argument("report", help="Pending report path, filename, or report_id")
ap.add_argument(
"--send-cmd",
help="Shell command used to send the report. Can also come from OWNER_REPORT_SEND_CMD.",
)
ap.add_argument("--dry-run", action="store_true", help="Print what would be sent and do not move files")
args = ap.parse_args()
src = resolve_input(args.report)
payload = parse_pending_report(src)
send_cmd = args.send_cmd or os.environ.get("OWNER_REPORT_SEND_CMD")
if not send_cmd and not args.dry_run:
raise SystemExit("missing send command: use --send-cmd or OWNER_REPORT_SEND_CMD")
if args.dry_run:
print(json.dumps({
"ok": True,
"dry_run": True,
"action": "would_send",
"pending_path": str(src),
"sent_path": str(_sent_path(src)),
"payload": payload,
"send_cmd": send_cmd,
}, ensure_ascii=False, indent=2))
return 0
proc = subprocess.run(
["bash", "-lc", send_cmd],
text=True,
capture_output=True,
env=_build_send_env(payload),
)
result = {
"ok": proc.returncode == 0,
"dry_run": False,
"pending_path": str(src),
"sent_path": str(_sent_path(src)),
"send_cmd": send_cmd,
"exit_code": proc.returncode,
"stdout": proc.stdout,
"stderr": proc.stderr,
"payload": payload,
}
if proc.returncode != 0:
print(json.dumps(result, ensure_ascii=False, indent=2))
return proc.returncode
result.update(_finalize_successful_send(src))
print(json.dumps(result, ensure_ascii=False, indent=2))
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,143 @@
#!/usr/bin/env python3
"""Minimal owner-report producer for ClawTeam-style worker checkpoints.
Writes ~/.clawteam/owner-reports/pending/<report_id>.md using explicit checkpoint
fields and a human-readable message suitable for direct Telegram delivery.
This intentionally stays tiny:
- no daemon
- no event bus
- no parser for arbitrary logs
- just explicit fields in -> pending markdown out
"""
from __future__ import annotations
import argparse
import json
import re
from datetime import datetime, timezone
from pathlib import Path
from owner_report_consumer import OWNER_REPORT_ROOT
PENDING_DIR = OWNER_REPORT_ROOT / "pending"
def _slug(value: str) -> str:
slug = re.sub(r"[^a-zA-Z0-9._-]+", "-", value.strip()).strip("-._")
return slug or "report"
def _now_iso() -> str:
return datetime.now().astimezone().isoformat(timespec="seconds")
def build_message(*, team: str, worker: str, task_id: str, progress: str, done: str, next_step: str, status: str, source: str | None, report_kind: str) -> str:
headline = f"🔔 [{team}] {worker}"
if report_kind == "leader-final":
headline = f"✅ [{team}] final"
lines = [
headline,
done,
]
if next_step.strip():
lines.append(f"{next_step}")
tech = [
f"task={task_id}",
f"status={status}",
f"progress={progress}",
]
if source:
tech.append(f"source={source}")
lines.append(" | ".join(tech))
return "\n".join(lines)
def build_report_body(*, report_id: str, team: str, worker: str, task_id: str, progress: str, done: str, next_step: str, status: str, source: str | None, created_at: str, message: str, report_kind: str) -> str:
fields: list[tuple[str, str | None]] = [
("report_id", report_id),
("team", team),
("worker", worker),
("task_id", task_id),
("progress", progress),
("done", done),
("next", next_step),
("status", status),
("report_kind", report_kind),
("source", source),
("created_at", created_at),
("message", json.dumps(message, ensure_ascii=False)),
]
return "\n".join(f"{k}: {v}" for k, v in fields if v is not None) + "\n"
def main() -> int:
ap = argparse.ArgumentParser(description="Create one pending owner report from explicit checkpoint fields")
ap.add_argument("--team", required=True)
ap.add_argument("--worker", required=True)
ap.add_argument("--task-id", required=True)
ap.add_argument("--progress", required=True)
ap.add_argument("--done", required=True)
ap.add_argument("--next", dest="next_step", required=True)
ap.add_argument("--status", required=True)
ap.add_argument("--source")
ap.add_argument("--report-kind", choices=["checkpoint", "leader-final"], default="checkpoint")
ap.add_argument("--report-id", help="Optional explicit report_id / filename stem")
ap.add_argument("--created-at", default=_now_iso())
ap.add_argument("--dry-run", action="store_true")
args = ap.parse_args()
report_id = args.report_id or f"{_slug(args.team)}-{_slug(args.worker)}-{_slug(args.task_id)}-{_slug(args.report_kind)}"
message = build_message(
team=args.team,
worker=args.worker,
task_id=args.task_id,
progress=args.progress,
done=args.done,
next_step=args.next_step,
status=args.status,
source=args.source,
report_kind=args.report_kind,
)
body = build_report_body(
report_id=report_id,
team=args.team,
worker=args.worker,
task_id=args.task_id,
progress=args.progress,
done=args.done,
next_step=args.next_step,
status=args.status,
source=args.source,
created_at=args.created_at,
message=message,
report_kind=args.report_kind,
)
path = PENDING_DIR / f"{report_id}.md"
result = {
"ok": True,
"report_id": report_id,
"path": str(path),
"message": message,
"dry_run": args.dry_run,
}
if args.dry_run:
result["body"] = body
print(json.dumps(result, ensure_ascii=False, indent=2))
return 0
PENDING_DIR.mkdir(parents=True, exist_ok=True)
path.write_text(body, encoding="utf-8")
print(json.dumps(result, ensure_ascii=False, indent=2))
return 0
if __name__ == "__main__":
raise SystemExit(main())

141
scripts/run_watchdog_b.sh Executable file
View File

@@ -0,0 +1,141 @@
#!/usr/bin/env bash
set -euo pipefail
# Watchdog B v2 dispatcher/runner.
# Unified entrypoint for timer/service/manual runs.
#
# Flow:
# 1) Call check_openclaw_state.sh to get one of: running | stalled | idle
# 2) Emit a human-readable action template for the detected state
# 3) Invoke the notification layer (dry-run/manual by default, configurable)
# 4) Persist rendered output for local verification / future integrations
#
# Notification behavior is intentionally conservative:
# - running: defaults to a manual/queue-ready owner report path
# - stalled/idle: nudge main agent first, then optionally escalate to owner report
# - outbound owner messaging reuses the existing owner-reporting-system queue
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
SKILL_DIR="$(cd -- "$SCRIPT_DIR/.." && pwd)"
WATCHDOG_B_CONFIG_FILE_DEFAULT="$HOME/.config/openclaw/watchdog-b.env"
WATCHDOG_B_CONFIG_FILE="${WATCHDOG_B_CONFIG_FILE:-$WATCHDOG_B_CONFIG_FILE_DEFAULT}"
if [[ -f "$WATCHDOG_B_CONFIG_FILE" ]]; then
set -a
# shellcheck disable=SC1090
. "$WATCHDOG_B_CONFIG_FILE"
set +a
fi
WORKSPACE_DEFAULT="$HOME/.openclaw/workspace"
WORKSPACE_DIR="${WATCHDOG_B_WORKSPACE:-$WORKSPACE_DEFAULT}"
CHECKER="${WATCHDOG_B_CHECKER:-$SCRIPT_DIR/check_openclaw_state.sh}"
ARTIFACT_DIR="${WATCHDOG_B_ARTIFACT_DIR:-$WORKSPACE_DIR/state/watchdog-b}"
TIMESTAMP="$(date '+%Y-%m-%dT%H:%M:%S%z')"
HOSTNAME_VALUE="$(hostname 2>/dev/null || echo unknown-host)"
NOTIFIER="${WATCHDOG_B_NOTIFIER:-$SCRIPT_DIR/notify_watchdog_b.py}"
NOTIFY_DRY_RUN="${WATCHDOG_B_NOTIFY_DRY_RUN:-1}"
mkdir -p "$ARTIFACT_DIR"
if [[ ! -x "$CHECKER" ]]; then
echo "watchdog-b error: checker not executable: $CHECKER" >&2
exit 1
fi
STATE="$($CHECKER)"
emit_running() {
cat <<EOF
WATCHDOG_B_STATE=running
WATCHDOG_B_TIMESTAMP=$TIMESTAMP
WATCHDOG_B_HOST=$HOSTNAME_VALUE
WATCHDOG_B_ACTION=progress-template
WATCHDOG_B_TEMPLATE_BEGIN
[watchdog-b][running] OpenClaw main runtime appears active.
Suggested future progress-report message template:
- Status: still running
- Checked at: $TIMESTAMP
- Host: $HOSTNAME_VALUE
- Summary: main runtime is alive and log activity is fresh.
- Next step: if desired, attach latest task/progress snapshot before sending.
WATCHDOG_B_TEMPLATE_END
WATCHDOG_B_NEXT_HOOK=progress_report_stub
EOF
}
emit_stalled() {
cat <<EOF
WATCHDOG_B_STATE=stalled
WATCHDOG_B_TIMESTAMP=$TIMESTAMP
WATCHDOG_B_HOST=$HOSTNAME_VALUE
WATCHDOG_B_ACTION=nudge-template
WATCHDOG_B_TEMPLATE_BEGIN
[watchdog-b][stalled] OpenClaw main runtime looks alive but may be stuck.
Suggested future nudge/escalation template:
- Audience: main agent and/or Eric
- Checked at: $TIMESTAMP
- Host: $HOSTNAME_VALUE
- Observation: process is alive, but activity log appears stale beyond threshold.
- Suggested ask: please confirm current task state, unblock reason, or whether intervention is needed.
WATCHDOG_B_TEMPLATE_END
WATCHDOG_B_NEXT_HOOK=stalled_nudge_stub
EOF
}
emit_idle() {
cat <<EOF
WATCHDOG_B_STATE=idle
WATCHDOG_B_TIMESTAMP=$TIMESTAMP
WATCHDOG_B_HOST=$HOSTNAME_VALUE
WATCHDOG_B_ACTION=idle-template
WATCHDOG_B_TEMPLATE_BEGIN
[watchdog-b][idle] OpenClaw main runtime does not appear to be actively running.
Suggested future reminder template:
- Audience: main agent and/or Eric
- Checked at: $TIMESTAMP
- Host: $HOSTNAME_VALUE
- Observation: no live runtime detected from pid/log heuristic.
- Suggested ask: confirm whether the runtime should be started, ignored, or left idle.
WATCHDOG_B_TEMPLATE_END
WATCHDOG_B_NEXT_HOOK=idle_reminder_stub
EOF
}
case "$STATE" in
running)
OUTPUT="$(emit_running)"
;;
stalled)
OUTPUT="$(emit_stalled)"
;;
idle)
OUTPUT="$(emit_idle)"
;;
*)
echo "watchdog-b error: unexpected state from checker: $STATE" >&2
exit 2
;;
esac
printf '%s\n' "$OUTPUT"
NOTIFY_OUTPUT=""
if [[ -x "$NOTIFIER" ]]; then
NOTIFY_CMD=("$NOTIFIER" --state "$STATE" --timestamp "$TIMESTAMP")
if [[ "$NOTIFY_DRY_RUN" == "1" ]]; then
NOTIFY_CMD+=(--dry-run)
fi
if NOTIFY_OUTPUT="$(WATCHDOG_B_ARTIFACT_DIR="$ARTIFACT_DIR" "${NOTIFY_CMD[@]}" 2>&1)"; then
printf '%s\n' "$NOTIFY_OUTPUT"
else
printf '%s\n' "$NOTIFY_OUTPUT"
echo "watchdog-b warning: notifier returned non-zero for state=$STATE" >&2
fi
else
echo "watchdog-b warning: notifier not executable: $NOTIFIER" >&2
fi
printf '%s\n' "$OUTPUT" > "$ARTIFACT_DIR/last-output.txt"
printf '%s\n' "$NOTIFY_OUTPUT" > "$ARTIFACT_DIR/last-notify-output.txt"
printf '%s\t%s\n' "$TIMESTAMP" "$STATE" >> "$ARTIFACT_DIR/history.tsv"
printf '%s\n' "$STATE" > "$ARTIFACT_DIR/last-state.txt"

View File

@@ -0,0 +1,65 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
WORKSPACE="$(cd -- "$SCRIPT_DIR/../.." && pwd)"
ARTIFACT_ROOT="${WATCHDOG_B_VERIFY_ROOT:-$WORKSPACE/state/watchdog-b-verify-e2e}"
RUN_ID="${RUN_ID:-$(date +%Y%m%dT%H%M%S)}"
RUN_DIR="$ARTIFACT_ROOT/$RUN_ID"
FIXTURE_DIR="$RUN_DIR/fixture"
LOG="$RUN_DIR/verify.log"
STATE_DIR="$RUN_DIR/state"
QUEUE_SNAPSHOT="$RUN_DIR/queue-before.txt"
QUEUE_AFTER="$RUN_DIR/queue-after.txt"
mkdir -p "$FIXTURE_DIR/host-runtime" "$FIXTURE_DIR/logs" "$STATE_DIR" "$RUN_DIR"
exec > >(tee -a "$LOG") 2>&1
echo "[verify] run_id=$RUN_ID"
echo "[verify] workspace=$WORKSPACE"
date -Iseconds
echo "[verify] snapshot owner-report queue before"
find "$HOME/.clawteam/owner-reports" -maxdepth 2 -type f | sort > "$QUEUE_SNAPSHOT" || true
sleep 180 &
FAKE_PID=$!
trap 'kill "$FAKE_PID" 2>/dev/null || true' EXIT
printf '%s\n' "$FAKE_PID" > "$FIXTURE_DIR/host-runtime/openclaw.pid"
touch "$FIXTURE_DIR/logs/openclaw.log"
echo "[verify] run watchdog-b direct E2E (enqueue + direct delivery)"
OPENCLAW_PID_FILE="$FIXTURE_DIR/host-runtime/openclaw.pid" \
OPENCLAW_LOG_FILE="$FIXTURE_DIR/logs/openclaw.log" \
STALL_AFTER_SECONDS=1200 \
WATCHDOG_B_ARTIFACT_DIR="$STATE_DIR" \
WATCHDOG_B_NOTIFY_DRY_RUN=0 \
WATCHDOG_B_RUNNING_REPORT_MODE=enqueue-and-drain \
WATCHDOG_B_RUNNING_REPORT_MIN_INTERVAL_SECONDS=0 \
"$WORKSPACE/scripts/watchdog-b/run_watchdog_b.sh" | tee "$RUN_DIR/run-output.txt"
echo "[verify] snapshot owner-report queue after"
find "$HOME/.clawteam/owner-reports" -maxdepth 2 -type f | sort > "$QUEUE_AFTER" || true
echo "[verify] summarize"
REPORT_ID="$(python3 - <<'PY' "$STATE_DIR/notify-state.json"
import json,sys
p=sys.argv[1]
with open(p,'r',encoding='utf-8') as f:
data=json.load(f)
print(data['events']['running']['last_result']['enqueue']['report_id'])
PY
)"
echo "REPORT_ID=$REPORT_ID" | tee "$RUN_DIR/result.env"
SENT_PATH="$HOME/.clawteam/owner-reports/sent/$REPORT_ID.md"
echo "SENT_PATH=$SENT_PATH" | tee -a "$RUN_DIR/result.env"
if [[ ! -f "$SENT_PATH" ]]; then
echo "[verify] ERROR: sent file missing: $SENT_PATH" >&2
exit 1
fi
echo "[verify] sent file found"
sed -n '1,120p' "$SENT_PATH" | tee "$RUN_DIR/sent-head.txt"
echo "[verify] done"

View File

@@ -0,0 +1,40 @@
# Single source of truth for watchdog-b owner-facing policy.
# Preferred location: ~/.config/openclaw/watchdog-b.env
# Can also be loaded manually by:
# WATCHDOG_B_CONFIG_FILE=... ./scripts/watchdog-b/run_watchdog_b.sh
# WATCHDOG_B_CONFIG_FILE=... ./scripts/watchdog-b/notify_watchdog_b.py --state running
# --- delivery / runtime policy ---
WATCHDOG_B_NOTIFY_DRY_RUN=0
WATCHDOG_B_RUNNING_REPORT_MODE=enqueue-and-drain
WATCHDOG_B_RUNNING_REPORT_MIN_INTERVAL_SECONDS=3600
WATCHDOG_B_OWNER_DELIVERY_MODE=direct-discord
WATCHDOG_B_OWNER_REPORT_CHANNEL=discord
WATCHDOG_B_OWNER_REPORT_TARGET=channel:REPLACE_ME
# --- non-running escalation policy ---
# Set this only if the host actually has a valid OpenClaw agent id to nudge.
# If left unset, stalled/idle paths skip main-agent nudge and can still escalate owner-facing reports.
# WATCHDOG_B_MAIN_AGENT_ID=main
# WATCHDOG_B_STALLED_OWNER_MODE=escalate
# WATCHDOG_B_IDLE_OWNER_MODE=escalate
# WATCHDOG_B_STALLED_OWNER_ESCALATION_AFTER=2
# WATCHDOG_B_IDLE_OWNER_ESCALATION_AFTER=2
# WATCHDOG_B_STALLED_NUDGE_MIN_INTERVAL_SECONDS=900
# WATCHDOG_B_IDLE_NUDGE_MIN_INTERVAL_SECONDS=1800
# --- owner-facing message style ---
WATCHDOG_B_RUNNING_EMOJI=✅
WATCHDOG_B_RUNNING_SUMMARY=主程序仍在運行
WATCHDOG_B_STALLED_EMOJI=⚠️
WATCHDOG_B_STALLED_SUMMARY=主程序疑似卡住
WATCHDOG_B_IDLE_EMOJI=🛑
WATCHDOG_B_IDLE_SUMMARY=主程序目前未運行
# Optional overrides for the compact technical line.
# WATCHDOG_B_RUNNING_PROGRESS_LABEL=running
# WATCHDOG_B_STALLED_PROGRESS_LABEL=stalled
# WATCHDOG_B_IDLE_PROGRESS_LABEL=idle
# WATCHDOG_B_RUNNING_STATUS=normal
# WATCHDOG_B_STALLED_STATUS=needs-attention
# WATCHDOG_B_IDLE_STATUS=needs-attention