Initial import of watchdog-discord-route skill
This commit is contained in:
BIN
scripts/__pycache__/notify_watchdog_b.cpython-312.pyc
Normal file
BIN
scripts/__pycache__/notify_watchdog_b.cpython-312.pyc
Normal file
Binary file not shown.
BIN
scripts/__pycache__/openclaw_runtime_probe.cpython-312.pyc
Normal file
BIN
scripts/__pycache__/openclaw_runtime_probe.cpython-312.pyc
Normal file
Binary file not shown.
BIN
scripts/__pycache__/owner_report_consumer.cpython-312.pyc
Normal file
BIN
scripts/__pycache__/owner_report_consumer.cpython-312.pyc
Normal file
Binary file not shown.
BIN
scripts/__pycache__/owner_report_driver.cpython-312.pyc
Normal file
BIN
scripts/__pycache__/owner_report_driver.cpython-312.pyc
Normal file
Binary file not shown.
BIN
scripts/__pycache__/owner_report_producer.cpython-312.pyc
Normal file
BIN
scripts/__pycache__/owner_report_producer.cpython-312.pyc
Normal file
Binary file not shown.
174
scripts/bootstrap_watchdog_bundle.sh
Executable file
174
scripts/bootstrap_watchdog_bundle.sh
Executable file
@@ -0,0 +1,174 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
|
||||
SKILL_DIR="$(cd -- "$SCRIPT_DIR/.." && pwd)"
|
||||
HOME_DIR="${HOME:?HOME is required}"
|
||||
WORKSPACE_DEFAULT="$HOME_DIR/.openclaw/workspace"
|
||||
WORKSPACE="${WATCHDOG_B_WORKSPACE:-$WORKSPACE_DEFAULT}"
|
||||
LIVE_SCRIPT_DIR="${WATCHDOG_B_LIVE_SCRIPT_DIR:-$WORKSPACE/scripts/watchdog-b}"
|
||||
SYSTEMD_USER_DIR="${WATCHDOG_B_SYSTEMD_USER_DIR:-$HOME_DIR/.config/systemd/user}"
|
||||
CONFIG_DIR="${WATCHDOG_B_CONFIG_DIR:-$HOME_DIR/.config/openclaw}"
|
||||
CONFIG_FILE="${WATCHDOG_B_CONFIG_FILE:-$CONFIG_DIR/watchdog-b.env}"
|
||||
PROBE_SCRIPT="${WATCHDOG_B_RUNTIME_PROBE:-$SCRIPT_DIR/openclaw_runtime_probe.py}"
|
||||
NODE_BIN_RAW="${WATCHDOG_B_NODE_BIN:-}"
|
||||
OPENCLAW_MJS="${WATCHDOG_B_OPENCLAW_MJS:-}"
|
||||
OPENCLAW_ENTRY="${WATCHDOG_B_OPENCLAW_ENTRY:-}"
|
||||
OWNER_REPORT_PRODUCER="${WATCHDOG_B_OWNER_PRODUCER:-$LIVE_SCRIPT_DIR/owner_report_producer.py}"
|
||||
OWNER_REPORT_DRIVER="${WATCHDOG_B_OWNER_DRIVER:-$LIVE_SCRIPT_DIR/owner_report_driver.py}"
|
||||
OWNER_REPORT_CONSUMER_DEFAULT="$LIVE_SCRIPT_DIR/owner_report_consumer.py"
|
||||
OWNER_REPORT_CONSUMER="${WATCHDOG_B_OWNER_REPORT_CONSUMER:-$OWNER_REPORT_CONSUMER_DEFAULT}"
|
||||
FAILURES=0
|
||||
|
||||
pass() { echo "[PASS] $*"; }
|
||||
warn() { echo "[WARN] $*"; }
|
||||
fail() { echo "[FAIL] $*"; FAILURES=$((FAILURES+1)); }
|
||||
|
||||
check_exists() {
|
||||
local path="$1" label="$2"
|
||||
if [[ -e "$path" ]]; then
|
||||
pass "$label: $path"
|
||||
else
|
||||
fail "$label missing: $path"
|
||||
fi
|
||||
}
|
||||
|
||||
check_exec_path() {
|
||||
local raw="$1" label="$2"
|
||||
local resolved=""
|
||||
if [[ "$raw" == */* ]]; then
|
||||
resolved="$raw"
|
||||
if [[ -x "$resolved" ]]; then
|
||||
pass "$label executable: $resolved"
|
||||
else
|
||||
fail "$label not executable: $resolved"
|
||||
fi
|
||||
return
|
||||
fi
|
||||
if resolved="$(command -v "$raw" 2>/dev/null)"; then
|
||||
pass "$label on PATH: $resolved"
|
||||
else
|
||||
fail "$label not found on PATH: $raw"
|
||||
fi
|
||||
}
|
||||
|
||||
check_systemd_user() {
|
||||
if ! command -v systemctl >/dev/null 2>&1; then
|
||||
fail "systemctl not found"
|
||||
return
|
||||
fi
|
||||
if systemctl --user --version >/dev/null 2>&1; then
|
||||
pass "systemd --user command available"
|
||||
else
|
||||
fail "systemd --user unavailable"
|
||||
fi
|
||||
if systemctl --user show-environment >/dev/null 2>&1; then
|
||||
pass "systemd --user bus reachable"
|
||||
else
|
||||
warn "systemd --user bus not reachable in current session"
|
||||
fi
|
||||
}
|
||||
|
||||
check_env_target() {
|
||||
if [[ ! -f "$CONFIG_FILE" ]]; then
|
||||
warn "config file not present yet: $CONFIG_FILE"
|
||||
return
|
||||
fi
|
||||
local target=""
|
||||
target="$(awk -F= '/^WATCHDOG_B_OWNER_REPORT_TARGET=/{print $2}' "$CONFIG_FILE" | tail -n 1 | tr -d '[:space:]' || true)"
|
||||
if [[ -z "$target" ]]; then
|
||||
fail "WATCHDOG_B_OWNER_REPORT_TARGET missing in $CONFIG_FILE"
|
||||
elif [[ "$target" == "channel:REPLACE_ME" ]]; then
|
||||
fail "WATCHDOG_B_OWNER_REPORT_TARGET still placeholder in $CONFIG_FILE"
|
||||
elif [[ "$target" == channel:* || "$target" == user:* ]]; then
|
||||
pass "WATCHDOG_B_OWNER_REPORT_TARGET looks configured: $target"
|
||||
else
|
||||
warn "WATCHDOG_B_OWNER_REPORT_TARGET present but format is unusual: $target"
|
||||
fi
|
||||
}
|
||||
|
||||
probe_runtime() {
|
||||
if [[ ! -f "$PROBE_SCRIPT" ]]; then
|
||||
fail "runtime probe missing: $PROBE_SCRIPT"
|
||||
return
|
||||
fi
|
||||
|
||||
local probe_output=""
|
||||
if ! probe_output="$(python3 "$PROBE_SCRIPT" --shell 2>/dev/null)"; then
|
||||
fail "runtime probe failed; set WATCHDOG_B_NODE_BIN / WATCHDOG_B_OPENCLAW_MJS / WATCHDOG_B_OPENCLAW_ENTRY explicitly"
|
||||
return
|
||||
fi
|
||||
|
||||
while IFS='=' read -r key value; do
|
||||
case "$key" in
|
||||
WATCHDOG_B_NODE_BIN) NODE_BIN_RAW="$value" ;;
|
||||
WATCHDOG_B_OPENCLAW_MJS) OPENCLAW_MJS="$value" ;;
|
||||
WATCHDOG_B_OPENCLAW_ENTRY) OPENCLAW_ENTRY="$value" ;;
|
||||
esac
|
||||
done <<< "$probe_output"
|
||||
|
||||
pass "runtime probe resolved node/openclaw paths"
|
||||
}
|
||||
|
||||
check_message_cli() {
|
||||
probe_runtime
|
||||
if [[ -n "$OPENCLAW_ENTRY" && -f "$OPENCLAW_ENTRY" ]]; then
|
||||
pass "openclaw entry present: $OPENCLAW_ENTRY"
|
||||
else
|
||||
fail "openclaw entry missing: ${OPENCLAW_ENTRY:-<unset>}"
|
||||
fi
|
||||
if [[ -n "$OPENCLAW_MJS" && -f "$OPENCLAW_MJS" ]]; then
|
||||
pass "openclaw mjs present: $OPENCLAW_MJS"
|
||||
else
|
||||
fail "openclaw mjs missing: ${OPENCLAW_MJS:-<unset>}"
|
||||
fi
|
||||
}
|
||||
|
||||
echo "watchdog-discord-route bootstrap"
|
||||
echo "- skill_dir: $SKILL_DIR"
|
||||
echo "- workspace: $WORKSPACE"
|
||||
echo "- live_script_dir: $LIVE_SCRIPT_DIR"
|
||||
echo "- systemd_user_dir: $SYSTEMD_USER_DIR"
|
||||
echo "- config_file: $CONFIG_FILE"
|
||||
|
||||
echo
|
||||
echo "[bundle]"
|
||||
check_exists "$SCRIPT_DIR/check_openclaw_state.sh" "bundled checker"
|
||||
check_exists "$SCRIPT_DIR/run_watchdog_b.sh" "bundled runner"
|
||||
check_exists "$SCRIPT_DIR/notify_watchdog_b.py" "bundled notifier"
|
||||
check_exists "$SCRIPT_DIR/openclaw_runtime_probe.py" "bundled runtime probe"
|
||||
check_exists "$SCRIPT_DIR/openclaw-watchdog-b.service" "bundled service"
|
||||
check_exists "$SCRIPT_DIR/openclaw-watchdog-b.timer" "bundled timer"
|
||||
check_exists "$SCRIPT_DIR/watchdog-b.env.example" "bundled env example"
|
||||
|
||||
echo
|
||||
echo "[workspace/live paths]"
|
||||
check_exists "$WORKSPACE" "workspace"
|
||||
check_exists "$LIVE_SCRIPT_DIR" "live script dir"
|
||||
check_exists "$OWNER_REPORT_CONSUMER" "live owner_report_consumer.py"
|
||||
check_exists "$OWNER_REPORT_PRODUCER" "live owner_report_producer.py"
|
||||
check_exists "$OWNER_REPORT_DRIVER" "live owner_report_driver.py"
|
||||
|
||||
echo
|
||||
echo "[runtime]"
|
||||
check_message_cli
|
||||
if [[ -n "$NODE_BIN_RAW" ]]; then
|
||||
check_exec_path "$NODE_BIN_RAW" "node"
|
||||
else
|
||||
fail "node runtime unresolved"
|
||||
fi
|
||||
check_exec_path "python3" "python3"
|
||||
check_systemd_user
|
||||
|
||||
echo
|
||||
echo "[discord-route minimal config]"
|
||||
check_env_target
|
||||
|
||||
if [[ $FAILURES -gt 0 ]]; then
|
||||
echo
|
||||
fail "bootstrap failed with $FAILURES issue(s)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo
|
||||
pass "bootstrap checks passed"
|
||||
68
scripts/check_openclaw_state.sh
Executable file
68
scripts/check_openclaw_state.sh
Executable file
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Watchdog B MVP tri-state checker for OpenClaw main runtime.
|
||||
# Output (stdout): exactly one token: running | stalled | idle
|
||||
#
|
||||
# Heuristic (MVP):
|
||||
# - If openclaw.pid exists and process is alive => running unless logs are stale.
|
||||
# - If process alive but log file hasn't changed for STALL_AFTER_SECONDS => stalled.
|
||||
# - Otherwise => idle.
|
||||
#
|
||||
# Future extension point:
|
||||
# - Replace/augment log-freshness with real main-agent session/ledger signals.
|
||||
|
||||
PID_FILE_DEFAULT="${OPENCLAW_PID_FILE:-/home/chchang/.openclaw/workspace/host-runtime/openclaw.pid}"
|
||||
LOG_FILE_DEFAULT="${OPENCLAW_LOG_FILE:-/home/chchang/.openclaw/workspace/logs/openclaw.log}"
|
||||
|
||||
STALL_AFTER_SECONDS="${STALL_AFTER_SECONDS:-1200}" # 20 minutes default
|
||||
NOW_EPOCH="$(date +%s)"
|
||||
|
||||
pid_file="$PID_FILE_DEFAULT"
|
||||
log_file="$LOG_FILE_DEFAULT"
|
||||
|
||||
get_mtime_epoch() {
|
||||
# GNU stat: %Y; BSD stat: -f %m
|
||||
local path="$1"
|
||||
if stat -c %Y "$path" >/dev/null 2>&1; then
|
||||
stat -c %Y "$path"
|
||||
else
|
||||
stat -f %m "$path"
|
||||
fi
|
||||
}
|
||||
|
||||
proc_alive() {
|
||||
local pid="$1"
|
||||
[[ -n "$pid" ]] || return 1
|
||||
[[ "$pid" =~ ^[0-9]+$ ]] || return 1
|
||||
kill -0 "$pid" >/dev/null 2>&1
|
||||
}
|
||||
|
||||
# No pid file => idle
|
||||
if [[ ! -f "$pid_file" ]]; then
|
||||
echo "idle"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
pid="$(tr -d ' \t\n\r' < "$pid_file" || true)"
|
||||
|
||||
# PID file exists but process not alive => idle
|
||||
if ! proc_alive "$pid"; then
|
||||
echo "idle"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Process alive. If no log file, assume running (can't assess stall)
|
||||
if [[ ! -f "$log_file" ]]; then
|
||||
echo "running"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
log_mtime="$(get_mtime_epoch "$log_file")"
|
||||
age=$(( NOW_EPOCH - log_mtime ))
|
||||
|
||||
if (( age > STALL_AFTER_SECONDS )); then
|
||||
echo "stalled"
|
||||
else
|
||||
echo "running"
|
||||
fi
|
||||
136
scripts/install_watchdog_bundle.sh
Executable file
136
scripts/install_watchdog_bundle.sh
Executable file
@@ -0,0 +1,136 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
|
||||
SKILL_DIR="$(cd -- "$SCRIPT_DIR/.." && pwd)"
|
||||
HOME_DIR="${HOME:?HOME is required}"
|
||||
WORKSPACE_DEFAULT="$HOME_DIR/.openclaw/workspace"
|
||||
WORKSPACE="${WATCHDOG_B_WORKSPACE:-$WORKSPACE_DEFAULT}"
|
||||
SYSTEMD_USER_DIR="${WATCHDOG_B_SYSTEMD_USER_DIR:-$HOME_DIR/.config/systemd/user}"
|
||||
CONFIG_DIR="${WATCHDOG_B_CONFIG_DIR:-$HOME_DIR/.config/openclaw}"
|
||||
LIVE_SCRIPT_DIR="${WATCHDOG_B_LIVE_SCRIPT_DIR:-$WORKSPACE/scripts/watchdog-b}"
|
||||
INSTALL_ENV_EXAMPLE=0
|
||||
FORCE=0
|
||||
|
||||
usage() {
|
||||
cat <<EOF
|
||||
Usage: $(basename "$0") [options]
|
||||
|
||||
Install bundled watchdog-discord-route assets into live paths.
|
||||
|
||||
Options:
|
||||
--workspace PATH Target workspace (default: $WORKSPACE_DEFAULT)
|
||||
--systemd-user-dir PATH Target systemd --user unit dir (default: ~/.config/systemd/user)
|
||||
--config-dir PATH Target config dir (default: ~/.config/openclaw)
|
||||
--live-script-dir PATH Target live watchdog script dir (default: <workspace>/scripts/watchdog-b)
|
||||
--install-env-example Also install watchdog-b.env.example to <config-dir>/watchdog-b.env.example
|
||||
--force Overwrite existing files in live paths
|
||||
-h, --help Show this help
|
||||
EOF
|
||||
}
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--workspace)
|
||||
WORKSPACE="$2"; shift 2 ;;
|
||||
--systemd-user-dir)
|
||||
SYSTEMD_USER_DIR="$2"; shift 2 ;;
|
||||
--config-dir)
|
||||
CONFIG_DIR="$2"; shift 2 ;;
|
||||
--live-script-dir)
|
||||
LIVE_SCRIPT_DIR="$2"; shift 2 ;;
|
||||
--install-env-example)
|
||||
INSTALL_ENV_EXAMPLE=1; shift ;;
|
||||
--force)
|
||||
FORCE=1; shift ;;
|
||||
-h|--help)
|
||||
usage; exit 0 ;;
|
||||
*)
|
||||
echo "unknown argument: $1" >&2
|
||||
usage >&2
|
||||
exit 2 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
mkdir -p "$LIVE_SCRIPT_DIR" "$SYSTEMD_USER_DIR" "$CONFIG_DIR"
|
||||
|
||||
copy_file() {
|
||||
local src="$1"
|
||||
local dest="$2"
|
||||
if [[ -e "$dest" && "$FORCE" != "1" ]]; then
|
||||
echo "skip existing: $dest"
|
||||
return 0
|
||||
fi
|
||||
install -m 0644 "$src" "$dest"
|
||||
echo "installed: $dest"
|
||||
}
|
||||
|
||||
copy_exec() {
|
||||
local src="$1"
|
||||
local dest="$2"
|
||||
if [[ -e "$dest" && "$FORCE" != "1" ]]; then
|
||||
echo "skip existing: $dest"
|
||||
return 0
|
||||
fi
|
||||
install -m 0755 "$src" "$dest"
|
||||
echo "installed: $dest"
|
||||
}
|
||||
|
||||
render_service() {
|
||||
local src="$SCRIPT_DIR/openclaw-watchdog-b.service"
|
||||
local dest="$SYSTEMD_USER_DIR/openclaw-watchdog-b.service"
|
||||
if [[ -e "$dest" && "$FORCE" != "1" ]]; then
|
||||
echo "skip existing: $dest"
|
||||
return 0
|
||||
fi
|
||||
sed \
|
||||
-e "s#%h/.openclaw/workspace#${WORKSPACE//\#/\\#}#g" \
|
||||
-e "s#%h/.config/openclaw#${CONFIG_DIR//\#/\\#}#g" \
|
||||
-e "s#%h/.openclaw/workspace/scripts/watchdog-b#${LIVE_SCRIPT_DIR//\#/\\#}#g" \
|
||||
"$src" > "$dest"
|
||||
chmod 0644 "$dest"
|
||||
echo "installed: $dest"
|
||||
}
|
||||
|
||||
copy_exec "$SCRIPT_DIR/check_openclaw_state.sh" "$LIVE_SCRIPT_DIR/check_openclaw_state.sh"
|
||||
copy_exec "$SCRIPT_DIR/run_watchdog_b.sh" "$LIVE_SCRIPT_DIR/run_watchdog_b.sh"
|
||||
copy_exec "$SCRIPT_DIR/verify_watchdog_b_e2e.sh" "$LIVE_SCRIPT_DIR/verify_watchdog_b_e2e.sh"
|
||||
copy_exec "$SCRIPT_DIR/notify_watchdog_b.py" "$LIVE_SCRIPT_DIR/notify_watchdog_b.py"
|
||||
copy_exec "$SCRIPT_DIR/openclaw_runtime_probe.py" "$LIVE_SCRIPT_DIR/openclaw_runtime_probe.py"
|
||||
copy_file "$SCRIPT_DIR/owner_report_consumer.py" "$LIVE_SCRIPT_DIR/owner_report_consumer.py"
|
||||
copy_file "$SCRIPT_DIR/owner_report_driver.py" "$LIVE_SCRIPT_DIR/owner_report_driver.py"
|
||||
copy_file "$SCRIPT_DIR/owner_report_producer.py" "$LIVE_SCRIPT_DIR/owner_report_producer.py"
|
||||
copy_file "$SCRIPT_DIR/openclaw-watchdog-b.timer" "$SYSTEMD_USER_DIR/openclaw-watchdog-b.timer"
|
||||
render_service
|
||||
|
||||
if [[ "$INSTALL_ENV_EXAMPLE" == "1" ]]; then
|
||||
copy_file "$SCRIPT_DIR/watchdog-b.env.example" "$CONFIG_DIR/watchdog-b.env.example"
|
||||
fi
|
||||
|
||||
cat <<EOF
|
||||
|
||||
Install summary
|
||||
- skill_dir: $SKILL_DIR
|
||||
- workspace: $WORKSPACE
|
||||
- live_script_dir: $LIVE_SCRIPT_DIR
|
||||
- systemd_user_dir: $SYSTEMD_USER_DIR
|
||||
- config_dir: $CONFIG_DIR
|
||||
|
||||
Operator install order
|
||||
1. Install bundle files:
|
||||
./scripts/install_watchdog_bundle.sh --install-env-example
|
||||
2. Create live env if missing:
|
||||
mkdir -p "$CONFIG_DIR"
|
||||
cp "$CONFIG_DIR/watchdog-b.env.example" "$CONFIG_DIR/watchdog-b.env"
|
||||
3. Edit live env and set at least:
|
||||
WATCHDOG_B_OWNER_REPORT_TARGET=channel:YOUR_DISCORD_CHANNEL_ID
|
||||
4. Run bootstrap:
|
||||
./scripts/bootstrap_watchdog_bundle.sh
|
||||
5. Only after bootstrap passes:
|
||||
systemctl --user daemon-reload
|
||||
systemctl --user enable --now openclaw-watchdog-b.timer
|
||||
|
||||
Notes
|
||||
- If $CONFIG_DIR/watchdog-b.env does not exist, bootstrap will warn/fail until you create it.
|
||||
- The env example is intentionally installed as watchdog-b.env.example first; copy it to watchdog-b.env after editing.
|
||||
EOF
|
||||
467
scripts/notify_watchdog_b.py
Executable file
467
scripts/notify_watchdog_b.py
Executable file
@@ -0,0 +1,467 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
SKILL_DIR = SCRIPT_DIR.parent
|
||||
WORKSPACE = Path(os.environ.get("WATCHDOG_B_WORKSPACE", str(Path.home() / ".openclaw" / "workspace")))
|
||||
CONFIG_FILE = Path(os.environ.get("WATCHDOG_B_CONFIG_FILE", str(Path.home() / ".config" / "openclaw" / "watchdog-b.env")))
|
||||
LIVE_SCRIPT_DIR = Path(os.environ.get("WATCHDOG_B_LIVE_SCRIPT_DIR", str(WORKSPACE / "scripts" / "watchdog-b")))
|
||||
|
||||
|
||||
def load_env_file(path: Path) -> None:
|
||||
if not path.exists():
|
||||
return
|
||||
for raw_line in path.read_text(encoding="utf-8").splitlines():
|
||||
line = raw_line.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
key, value = line.split("=", 1)
|
||||
key = key.strip()
|
||||
if not key:
|
||||
continue
|
||||
value = value.strip()
|
||||
if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")):
|
||||
value = value[1:-1]
|
||||
os.environ.setdefault(key, value)
|
||||
|
||||
|
||||
load_env_file(CONFIG_FILE)
|
||||
|
||||
STATE_DIR = Path(os.environ.get("WATCHDOG_B_ARTIFACT_DIR", str(WORKSPACE / "state" / "watchdog-b")))
|
||||
NOTIFY_STATE_PATH = STATE_DIR / "notify-state.json"
|
||||
OWNER_PRODUCER = Path(os.environ.get("WATCHDOG_B_OWNER_PRODUCER", str(SCRIPT_DIR / "owner_report_producer.py")))
|
||||
OWNER_DRIVER = Path(os.environ.get("WATCHDOG_B_OWNER_DRIVER", str(SCRIPT_DIR / "owner_report_driver.py")))
|
||||
PYTHON_BIN = os.environ.get("WATCHDOG_B_PYTHON_BIN", sys.executable or "python3")
|
||||
WATCHDOG_OWNER_REPORT_CHANNEL = os.environ.get("WATCHDOG_B_OWNER_REPORT_CHANNEL", "discord")
|
||||
WATCHDOG_OWNER_REPORT_TARGET = os.environ.get("WATCHDOG_B_OWNER_REPORT_TARGET", "channel:REPLACE_ME")
|
||||
WATCHDOG_MAIN_AGENT_ID = os.environ.get("WATCHDOG_B_MAIN_AGENT_ID", "").strip()
|
||||
HOSTNAME = os.uname().nodename
|
||||
UTC = timezone.utc
|
||||
RUNTIME_PROBE = Path(os.environ.get("WATCHDOG_B_RUNTIME_PROBE", str(SCRIPT_DIR / "openclaw_runtime_probe.py")))
|
||||
RUNTIME_CACHE: dict[str, Path] | None = None
|
||||
|
||||
DEFAULTS = {
|
||||
"running_min_interval_seconds": 3600,
|
||||
"stalled_nudge_min_interval_seconds": 900,
|
||||
"idle_nudge_min_interval_seconds": 1800,
|
||||
"stalled_owner_escalation_after": 2,
|
||||
"idle_owner_escalation_after": 2,
|
||||
}
|
||||
|
||||
|
||||
def now_iso() -> str:
|
||||
return datetime.now().astimezone().isoformat(timespec="seconds")
|
||||
|
||||
|
||||
def path_or_none(value: str | None) -> Path | None:
|
||||
if not value:
|
||||
return None
|
||||
return Path(value).expanduser()
|
||||
|
||||
|
||||
def detect_runtime_paths() -> dict[str, Path]:
|
||||
global RUNTIME_CACHE
|
||||
if RUNTIME_CACHE is not None:
|
||||
return RUNTIME_CACHE
|
||||
|
||||
node_bin = path_or_none(os.environ.get("WATCHDOG_B_NODE_BIN"))
|
||||
openclaw_mjs = path_or_none(os.environ.get("WATCHDOG_B_OPENCLAW_MJS"))
|
||||
openclaw_entry = path_or_none(os.environ.get("WATCHDOG_B_OPENCLAW_ENTRY"))
|
||||
|
||||
if node_bin and node_bin.exists() and os.access(node_bin, os.X_OK) and openclaw_mjs and openclaw_mjs.is_file() and openclaw_entry and openclaw_entry.is_file():
|
||||
RUNTIME_CACHE = {
|
||||
"node": node_bin,
|
||||
"openclaw_mjs": openclaw_mjs,
|
||||
"openclaw_entry": openclaw_entry,
|
||||
}
|
||||
return RUNTIME_CACHE
|
||||
|
||||
if RUNTIME_PROBE.exists():
|
||||
proc = subprocess.run([PYTHON_BIN, str(RUNTIME_PROBE)], text=True, capture_output=True)
|
||||
if proc.returncode == 0:
|
||||
payload = json.loads(proc.stdout)
|
||||
detected = payload.get("detected", {})
|
||||
RUNTIME_CACHE = {
|
||||
"node": Path(detected["node"]),
|
||||
"openclaw_mjs": Path(detected["openclaw_mjs"]),
|
||||
"openclaw_entry": Path(detected["openclaw_entry"]),
|
||||
}
|
||||
return RUNTIME_CACHE
|
||||
|
||||
node_which = shutil.which("node")
|
||||
if node_which:
|
||||
node_bin = Path(node_which)
|
||||
|
||||
missing = []
|
||||
if not node_bin or not node_bin.exists():
|
||||
missing.append("WATCHDOG_B_NODE_BIN")
|
||||
if not openclaw_mjs or not openclaw_mjs.is_file():
|
||||
missing.append("WATCHDOG_B_OPENCLAW_MJS")
|
||||
if not openclaw_entry or not openclaw_entry.is_file():
|
||||
missing.append("WATCHDOG_B_OPENCLAW_ENTRY")
|
||||
raise RuntimeError(
|
||||
"Unable to auto-detect watchdog runtime paths. Missing: " + ", ".join(missing)
|
||||
)
|
||||
|
||||
|
||||
def load_state() -> dict[str, Any]:
|
||||
if NOTIFY_STATE_PATH.exists():
|
||||
try:
|
||||
return json.loads(NOTIFY_STATE_PATH.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
pass
|
||||
return {"events": {}}
|
||||
|
||||
|
||||
def save_state(data: dict[str, Any]) -> None:
|
||||
STATE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
NOTIFY_STATE_PATH.write_text(json.dumps(data, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
|
||||
|
||||
|
||||
def event_bucket(state: str) -> dict[str, Any]:
|
||||
data = load_state()
|
||||
events = data.setdefault("events", {})
|
||||
bucket = events.setdefault(state, {})
|
||||
return data
|
||||
|
||||
|
||||
def get_bucket(data: dict[str, Any], state: str) -> dict[str, Any]:
|
||||
events = data.setdefault("events", {})
|
||||
return events.setdefault(state, {})
|
||||
|
||||
|
||||
def should_send(bucket: dict[str, Any], min_interval_seconds: int, timestamp: datetime) -> tuple[bool, str]:
|
||||
last_sent = bucket.get("last_sent_at")
|
||||
if not last_sent:
|
||||
return True, "first-send"
|
||||
try:
|
||||
prev = datetime.fromisoformat(last_sent)
|
||||
except Exception:
|
||||
return True, "state-corrupt-reset"
|
||||
elapsed = (timestamp - prev).total_seconds()
|
||||
if elapsed >= min_interval_seconds:
|
||||
return True, f"interval-ok:{int(elapsed)}s"
|
||||
return False, f"throttled:{int(elapsed)}s<{min_interval_seconds}s"
|
||||
|
||||
|
||||
def mark_sent(bucket: dict[str, Any], channel: str, timestamp: str, detail: dict[str, Any] | None = None) -> None:
|
||||
bucket["last_sent_at"] = timestamp
|
||||
bucket["last_channel"] = channel
|
||||
bucket["send_count"] = int(bucket.get("send_count", 0)) + 1
|
||||
bucket["last_detail"] = detail or {}
|
||||
|
||||
|
||||
def build_owner_message(state: str, timestamp: str, detail: str) -> dict[str, str]:
|
||||
emoji_default = {
|
||||
"running": "✅",
|
||||
"stalled": "⚠️",
|
||||
"idle": "🛑",
|
||||
}
|
||||
summary_default = {
|
||||
"running": "主程序仍在運行",
|
||||
"stalled": "主程序疑似卡住",
|
||||
"idle": "主程序目前未運行",
|
||||
}
|
||||
progress_default = {
|
||||
"running": "running",
|
||||
"stalled": "stalled",
|
||||
"idle": "idle",
|
||||
}
|
||||
status_default = {
|
||||
"running": "normal",
|
||||
"stalled": "needs-attention",
|
||||
"idle": "needs-attention",
|
||||
}
|
||||
source_default = {
|
||||
"running": "watchdog-b-running",
|
||||
"stalled": "watchdog-b-stalled-escalation",
|
||||
"idle": "watchdog-b-idle-escalation",
|
||||
}
|
||||
detail_default = {
|
||||
"running": f"checked_at={timestamp} host={HOSTNAME}",
|
||||
"stalled": f"checked_at={timestamp} host={HOSTNAME}; stale activity detected while process still looked alive",
|
||||
"idle": f"checked_at={timestamp} host={HOSTNAME}; no active main runtime detected",
|
||||
}
|
||||
return {
|
||||
"progress": os.environ.get(f"WATCHDOG_B_{state.upper()}_PROGRESS_LABEL", progress_default[state]),
|
||||
"done": f"{os.environ.get(f'WATCHDOG_B_{state.upper()}_EMOJI', emoji_default[state])} {os.environ.get(f'WATCHDOG_B_{state.upper()}_SUMMARY', summary_default[state])}",
|
||||
"next": detail or os.environ.get(f"WATCHDOG_B_{state.upper()}_DETAIL", detail_default[state]),
|
||||
"status": os.environ.get(f"WATCHDOG_B_{state.upper()}_STATUS", status_default[state]),
|
||||
"source": os.environ.get(f"WATCHDOG_B_{state.upper()}_SOURCE", source_default[state]),
|
||||
}
|
||||
|
||||
|
||||
def enqueue_owner_report(*, state: str, timestamp: str, dry_run: bool, detail: str) -> dict[str, Any]:
|
||||
msg = build_owner_message(state, timestamp, detail)
|
||||
report_id = f"watchdog-b-{state}-{datetime.now(UTC).strftime('%Y%m%dT%H%M%SZ')}"
|
||||
cmd = [
|
||||
PYTHON_BIN,
|
||||
str(OWNER_PRODUCER),
|
||||
"--team",
|
||||
"watchdog-b",
|
||||
"--worker",
|
||||
HOSTNAME,
|
||||
"--task-id",
|
||||
f"openclaw-main-{state}",
|
||||
"--progress",
|
||||
msg["progress"],
|
||||
"--done",
|
||||
msg["done"],
|
||||
"--next",
|
||||
msg["next"],
|
||||
"--status",
|
||||
msg["status"],
|
||||
"--source",
|
||||
msg["source"],
|
||||
"--report-id",
|
||||
report_id,
|
||||
]
|
||||
if dry_run:
|
||||
cmd.append("--dry-run")
|
||||
proc = subprocess.run(cmd, text=True, capture_output=True)
|
||||
result = {
|
||||
"kind": "owner-report-enqueue",
|
||||
"ok": proc.returncode == 0,
|
||||
"command": cmd,
|
||||
"exit_code": proc.returncode,
|
||||
"stdout": proc.stdout,
|
||||
"stderr": proc.stderr,
|
||||
"report_id": report_id,
|
||||
"dry_run": dry_run,
|
||||
}
|
||||
if proc.returncode == 0 and not dry_run:
|
||||
result["pending_path"] = str(Path.home() / ".clawteam" / "owner-reports" / "pending" / f"{report_id}.md")
|
||||
return result
|
||||
|
||||
|
||||
def build_owner_send_cmd() -> str:
|
||||
runtime = detect_runtime_paths()
|
||||
return (
|
||||
f'"{runtime["node"]}" "{runtime["openclaw_entry"]}" message send '
|
||||
f'--channel {WATCHDOG_OWNER_REPORT_CHANNEL} '
|
||||
f"--target '{WATCHDOG_OWNER_REPORT_TARGET}' "
|
||||
f'--message "$OWNER_REPORT_MESSAGE"'
|
||||
)
|
||||
|
||||
|
||||
def deliver_owner_report(*, report_id: str, dry_run: bool) -> dict[str, Any]:
|
||||
send_cmd = build_owner_send_cmd()
|
||||
cmd = [PYTHON_BIN, str(OWNER_DRIVER), report_id, "--send-cmd", send_cmd]
|
||||
if dry_run:
|
||||
cmd.append("--dry-run")
|
||||
proc = subprocess.run(cmd, text=True, capture_output=True)
|
||||
return {
|
||||
"kind": "owner-report-direct-delivery",
|
||||
"ok": proc.returncode == 0,
|
||||
"command": cmd,
|
||||
"send_cmd": send_cmd,
|
||||
"exit_code": proc.returncode,
|
||||
"stdout": proc.stdout,
|
||||
"stderr": proc.stderr,
|
||||
"dry_run": dry_run,
|
||||
"report_id": report_id,
|
||||
"target_channel": WATCHDOG_OWNER_REPORT_CHANNEL,
|
||||
"target": WATCHDOG_OWNER_REPORT_TARGET,
|
||||
}
|
||||
|
||||
|
||||
def call_main_agent(*, state: str, timestamp: str, dry_run: bool) -> dict[str, Any]:
|
||||
message = (
|
||||
f"[watchdog-b][{state}] {timestamp}\n"
|
||||
f"Host: {HOSTNAME}\n"
|
||||
f"Please confirm current task state, whether progress is blocked, and whether owner-facing escalation is needed."
|
||||
)
|
||||
if not WATCHDOG_MAIN_AGENT_ID:
|
||||
return {
|
||||
"kind": "main-agent-nudge",
|
||||
"ok": True,
|
||||
"skipped": True,
|
||||
"reason": "WATCHDOG_B_MAIN_AGENT_ID not configured",
|
||||
"dry_run": dry_run,
|
||||
"message": message,
|
||||
}
|
||||
try:
|
||||
runtime = detect_runtime_paths()
|
||||
except Exception as exc:
|
||||
return {
|
||||
"kind": "main-agent-nudge",
|
||||
"ok": False,
|
||||
"dry_run": dry_run,
|
||||
"error": str(exc),
|
||||
"message": message,
|
||||
}
|
||||
cmd = [
|
||||
str(runtime["node"]),
|
||||
str(runtime["openclaw_mjs"]),
|
||||
"agent",
|
||||
"--agent",
|
||||
WATCHDOG_MAIN_AGENT_ID,
|
||||
"--message",
|
||||
message,
|
||||
"--timeout",
|
||||
os.environ.get("WATCHDOG_B_MAIN_AGENT_TIMEOUT", "120"),
|
||||
]
|
||||
if dry_run:
|
||||
return {"kind": "main-agent-nudge", "ok": True, "dry_run": True, "command": cmd, "message": message}
|
||||
try:
|
||||
proc = subprocess.run(cmd, text=True, capture_output=True, timeout=int(os.environ.get("WATCHDOG_B_MAIN_AGENT_TIMEOUT", "120")) + 10)
|
||||
return {
|
||||
"kind": "main-agent-nudge",
|
||||
"ok": proc.returncode == 0,
|
||||
"dry_run": False,
|
||||
"command": cmd,
|
||||
"exit_code": proc.returncode,
|
||||
"stdout": proc.stdout,
|
||||
"stderr": proc.stderr,
|
||||
"message": message,
|
||||
}
|
||||
except subprocess.TimeoutExpired as e:
|
||||
return {
|
||||
"kind": "main-agent-nudge",
|
||||
"ok": False,
|
||||
"dry_run": False,
|
||||
"command": cmd,
|
||||
"timeout": True,
|
||||
"stdout": e.stdout,
|
||||
"stderr": e.stderr,
|
||||
"message": message,
|
||||
}
|
||||
|
||||
|
||||
def maybe_running_report(data: dict[str, Any], bucket: dict[str, Any], timestamp: str, dry_run: bool) -> dict[str, Any]:
|
||||
mode = os.environ.get("WATCHDOG_B_RUNNING_REPORT_MODE", "manual").lower()
|
||||
min_interval = int(os.environ.get("WATCHDOG_B_RUNNING_REPORT_MIN_INTERVAL_SECONDS", str(DEFAULTS["running_min_interval_seconds"])))
|
||||
allowed, reason = should_send(bucket, min_interval, datetime.fromisoformat(timestamp))
|
||||
result: dict[str, Any] = {
|
||||
"state": "running",
|
||||
"route": "owner-report",
|
||||
"mode": mode,
|
||||
"allowed": allowed,
|
||||
"reason": reason,
|
||||
"dry_run": dry_run,
|
||||
}
|
||||
if mode not in {"manual", "enqueue", "enqueue-and-drain"}:
|
||||
result.update({"ok": False, "error": f"unsupported running mode: {mode}"})
|
||||
return result
|
||||
if mode == "manual":
|
||||
result.update({
|
||||
"ok": True,
|
||||
"action": "manual-only",
|
||||
"hint": "set WATCHDOG_B_RUNNING_REPORT_MODE=enqueue to create a real pending item, or enqueue-and-drain to enqueue and directly deliver it to Discord",
|
||||
})
|
||||
return result
|
||||
if not allowed:
|
||||
result.update({"ok": True, "action": "suppressed"})
|
||||
return result
|
||||
enqueue = enqueue_owner_report(state="running", timestamp=timestamp, dry_run=dry_run, detail="Main runtime alive and log activity fresh.")
|
||||
result["enqueue"] = enqueue
|
||||
result["ok"] = enqueue.get("ok", False)
|
||||
if enqueue.get("ok"):
|
||||
mark_sent(bucket, "owner-report-enqueue", timestamp, {"report_id": enqueue.get("report_id")})
|
||||
if mode == "enqueue-and-drain" and enqueue.get("ok"):
|
||||
deliver = deliver_owner_report(report_id=enqueue.get("report_id"), dry_run=dry_run)
|
||||
result["deliver"] = deliver
|
||||
result["ok"] = result["ok"] and deliver.get("ok", False)
|
||||
if deliver.get("ok"):
|
||||
mark_sent(bucket, "owner-report-direct-delivery", timestamp, {"report_id": enqueue.get("report_id")})
|
||||
return result
|
||||
|
||||
|
||||
def maybe_nudge_and_escalate(data: dict[str, Any], bucket: dict[str, Any], *, state: str, timestamp: str, dry_run: bool) -> dict[str, Any]:
|
||||
is_stalled = state == "stalled"
|
||||
nudge_min = int(os.environ.get(
|
||||
"WATCHDOG_B_STALLED_NUDGE_MIN_INTERVAL_SECONDS" if is_stalled else "WATCHDOG_B_IDLE_NUDGE_MIN_INTERVAL_SECONDS",
|
||||
str(DEFAULTS["stalled_nudge_min_interval_seconds"] if is_stalled else DEFAULTS["idle_nudge_min_interval_seconds"]),
|
||||
))
|
||||
escalation_after = int(os.environ.get(
|
||||
"WATCHDOG_B_STALLED_OWNER_ESCALATION_AFTER" if is_stalled else "WATCHDOG_B_IDLE_OWNER_ESCALATION_AFTER",
|
||||
str(DEFAULTS["stalled_owner_escalation_after"] if is_stalled else DEFAULTS["idle_owner_escalation_after"]),
|
||||
))
|
||||
owner_mode = os.environ.get(
|
||||
"WATCHDOG_B_STALLED_OWNER_MODE" if is_stalled else "WATCHDOG_B_IDLE_OWNER_MODE",
|
||||
"escalate",
|
||||
).lower()
|
||||
|
||||
bucket["seen_count"] = int(bucket.get("seen_count", 0)) + 1
|
||||
allowed, reason = should_send(bucket, nudge_min, datetime.fromisoformat(timestamp))
|
||||
result: dict[str, Any] = {
|
||||
"state": state,
|
||||
"route": "main-agent-then-owner",
|
||||
"allowed": allowed,
|
||||
"reason": reason,
|
||||
"seen_count": bucket["seen_count"],
|
||||
"owner_mode": owner_mode,
|
||||
"dry_run": dry_run,
|
||||
}
|
||||
|
||||
if allowed:
|
||||
nudge = call_main_agent(state=state, timestamp=timestamp, dry_run=dry_run)
|
||||
result["main_agent_nudge"] = nudge
|
||||
if nudge.get("ok"):
|
||||
mark_sent(bucket, "main-agent", timestamp, {"state": state})
|
||||
result["ok"] = nudge.get("ok", False)
|
||||
else:
|
||||
result.update({"ok": True, "action": "nudge-suppressed"})
|
||||
|
||||
should_escalate = owner_mode in {"always", "escalate"} and bucket["seen_count"] >= escalation_after
|
||||
if owner_mode == "never":
|
||||
should_escalate = False
|
||||
|
||||
if should_escalate:
|
||||
owner_allowed, owner_reason = should_send(bucket, nudge_min, datetime.fromisoformat(timestamp))
|
||||
result["owner_escalation_gate"] = {"allowed": owner_allowed, "reason": owner_reason, "threshold": escalation_after}
|
||||
if owner_allowed:
|
||||
detail = "Main agent was nudged repeatedly; please review whether manual intervention is needed."
|
||||
enqueue = enqueue_owner_report(state=state, timestamp=timestamp, dry_run=dry_run, detail=detail)
|
||||
result["owner_enqueue"] = enqueue
|
||||
result["ok"] = result.get("ok", True) and enqueue.get("ok", False)
|
||||
if enqueue.get("ok"):
|
||||
mark_sent(bucket, "owner-report-enqueue", timestamp, {"report_id": enqueue.get("report_id"), "state": state})
|
||||
owner_delivery_mode = os.environ.get(
|
||||
"WATCHDOG_B_OWNER_DELIVERY_MODE",
|
||||
"enqueue-only",
|
||||
).lower()
|
||||
result["owner_delivery_mode"] = owner_delivery_mode
|
||||
if owner_delivery_mode == "direct-discord":
|
||||
deliver = deliver_owner_report(report_id=enqueue.get("report_id"), dry_run=dry_run)
|
||||
result["owner_deliver"] = deliver
|
||||
result["ok"] = result.get("ok", True) and deliver.get("ok", False)
|
||||
if deliver.get("ok"):
|
||||
mark_sent(bucket, "owner-report-direct-delivery", timestamp, {"report_id": enqueue.get("report_id"), "state": state})
|
||||
return result
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser(description="Notification layer for watchdog-b")
|
||||
ap.add_argument("--state", required=True, choices=["running", "stalled", "idle"])
|
||||
ap.add_argument("--timestamp", default=now_iso())
|
||||
ap.add_argument("--dry-run", action="store_true")
|
||||
args = ap.parse_args()
|
||||
|
||||
data = load_state()
|
||||
bucket = get_bucket(data, args.state)
|
||||
|
||||
if args.state == "running":
|
||||
result = maybe_running_report(data, bucket, args.timestamp, args.dry_run)
|
||||
else:
|
||||
result = maybe_nudge_and_escalate(data, bucket, state=args.state, timestamp=args.timestamp, dry_run=args.dry_run)
|
||||
|
||||
bucket["last_seen_at"] = args.timestamp
|
||||
bucket["last_result"] = result
|
||||
save_state(data)
|
||||
print(json.dumps(result, ensure_ascii=False, indent=2))
|
||||
return 0 if result.get("ok", False) else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
17
scripts/openclaw-watchdog-b.service
Normal file
17
scripts/openclaw-watchdog-b.service
Normal file
@@ -0,0 +1,17 @@
|
||||
# Template systemd --user unit for Watchdog B.
|
||||
# Install to: ~/.config/systemd/user/openclaw-watchdog-b.service
|
||||
# Optional env file: ~/.config/openclaw/watchdog-b.env
|
||||
|
||||
[Unit]
|
||||
Description=OpenClaw Watchdog B (verified direct Discord owner-facing path)
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
WorkingDirectory=%h/.openclaw/workspace
|
||||
Environment=WATCHDOG_B_CONFIG_FILE=%h/.config/openclaw/watchdog-b.env
|
||||
EnvironmentFile=-%h/.config/openclaw/watchdog-b.env
|
||||
ExecStart=%h/.openclaw/workspace/scripts/watchdog-b/run_watchdog_b.sh
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
15
scripts/openclaw-watchdog-b.timer
Normal file
15
scripts/openclaw-watchdog-b.timer
Normal file
@@ -0,0 +1,15 @@
|
||||
# Template systemd --user timer (DO NOT auto-install).
|
||||
# Runs every 10 minutes.
|
||||
|
||||
[Unit]
|
||||
Description=Run OpenClaw Watchdog B every 10 minutes
|
||||
|
||||
[Timer]
|
||||
OnCalendar=*:0/10
|
||||
Persistent=true
|
||||
# Optional jitter to avoid synchronized runs
|
||||
RandomizedDelaySec=30
|
||||
Unit=openclaw-watchdog-b.service
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
200
scripts/openclaw_runtime_probe.py
Normal file
200
scripts/openclaw_runtime_probe.py
Normal file
@@ -0,0 +1,200 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
HOME = Path.home()
|
||||
ENV_KEYS = {
|
||||
"node": "WATCHDOG_B_NODE_BIN",
|
||||
"openclaw_mjs": "WATCHDOG_B_OPENCLAW_MJS",
|
||||
"openclaw_entry": "WATCHDOG_B_OPENCLAW_ENTRY",
|
||||
}
|
||||
|
||||
|
||||
def dedupe(items: Iterable[Path]) -> list[Path]:
|
||||
seen: set[str] = set()
|
||||
out: list[Path] = []
|
||||
for item in items:
|
||||
key = str(item)
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
out.append(item)
|
||||
return out
|
||||
|
||||
|
||||
def path_candidates() -> tuple[Path | None, list[Path], list[Path]]:
|
||||
node_path = shutil.which("node")
|
||||
openclaw_path = shutil.which("openclaw")
|
||||
node_candidate = Path(node_path).resolve() if node_path else None
|
||||
roots: list[Path] = []
|
||||
entry_candidates: list[Path] = []
|
||||
if openclaw_path:
|
||||
op = Path(openclaw_path).resolve()
|
||||
roots.extend([
|
||||
op.parent.parent / "lib" / "node_modules" / "openclaw",
|
||||
op.parent.parent.parent / "lib" / "node_modules" / "openclaw",
|
||||
])
|
||||
entry_candidates.append(op.parent.parent / "lib" / "node_modules" / "openclaw" / "dist" / "entry.js")
|
||||
if node_candidate:
|
||||
roots.append(node_candidate.parent.parent / "lib" / "node_modules" / "openclaw")
|
||||
return node_candidate, dedupe(roots), dedupe(entry_candidates)
|
||||
|
||||
|
||||
def common_roots() -> list[Path]:
|
||||
roots: list[Path] = []
|
||||
nvm_dir = Path(os.environ.get("NVM_DIR", HOME / ".nvm")).expanduser()
|
||||
roots.extend([
|
||||
HOME / ".nvm" / "versions" / "node",
|
||||
nvm_dir / "versions" / "node",
|
||||
HOME / ".local" / "share" / "pnpm" / "global",
|
||||
HOME / ".npm-global",
|
||||
Path("/usr/local"),
|
||||
Path("/usr"),
|
||||
HOME / ".volta" / "tools" / "image",
|
||||
])
|
||||
return dedupe(roots)
|
||||
|
||||
|
||||
def scan_openclaw_install_roots() -> list[Path]:
|
||||
candidates: list[Path] = []
|
||||
for root in common_roots():
|
||||
if not root.exists():
|
||||
continue
|
||||
if root.name == "node":
|
||||
for child in sorted(root.glob("v*/lib/node_modules/openclaw"), reverse=True):
|
||||
candidates.append(child)
|
||||
continue
|
||||
patterns = [
|
||||
"lib/node_modules/openclaw",
|
||||
"node_modules/openclaw",
|
||||
"*/lib/node_modules/openclaw",
|
||||
"*/node_modules/openclaw",
|
||||
]
|
||||
for pattern in patterns:
|
||||
for child in sorted(root.glob(pattern), reverse=True):
|
||||
candidates.append(child)
|
||||
return dedupe(candidates)
|
||||
|
||||
|
||||
def valid_node(path: Path | None) -> Path | None:
|
||||
if path and path.exists() and os.access(path, os.X_OK):
|
||||
return path
|
||||
return None
|
||||
|
||||
|
||||
def valid_file(path: Path | None) -> Path | None:
|
||||
if path and path.is_file():
|
||||
return path
|
||||
return None
|
||||
|
||||
|
||||
def detect_runtime() -> dict[str, object]:
|
||||
result: dict[str, object] = {"ok": False, "detected": {}, "sources": {}, "searched": {}}
|
||||
detected: dict[str, str] = {}
|
||||
sources: dict[str, str] = {}
|
||||
searched: dict[str, list[str]] = {"node": [], "openclaw": []}
|
||||
|
||||
env_node = os.environ.get(ENV_KEYS["node"])
|
||||
if env_node:
|
||||
searched["node"].append(env_node)
|
||||
node = valid_node(Path(env_node).expanduser())
|
||||
if node:
|
||||
detected["node"] = str(node)
|
||||
sources["node"] = f"env:{ENV_KEYS['node']}"
|
||||
env_mjs = os.environ.get(ENV_KEYS["openclaw_mjs"])
|
||||
if env_mjs:
|
||||
searched["openclaw"].append(env_mjs)
|
||||
mjs = valid_file(Path(env_mjs).expanduser())
|
||||
if mjs:
|
||||
detected["openclaw_mjs"] = str(mjs)
|
||||
sources["openclaw_mjs"] = f"env:{ENV_KEYS['openclaw_mjs']}"
|
||||
env_entry = os.environ.get(ENV_KEYS["openclaw_entry"])
|
||||
if env_entry:
|
||||
searched["openclaw"].append(env_entry)
|
||||
entry = valid_file(Path(env_entry).expanduser())
|
||||
if entry:
|
||||
detected["openclaw_entry"] = str(entry)
|
||||
sources["openclaw_entry"] = f"env:{ENV_KEYS['openclaw_entry']}"
|
||||
|
||||
path_node, path_roots, path_entry_candidates = path_candidates()
|
||||
if "node" not in detected and path_node:
|
||||
searched["node"].append(str(path_node))
|
||||
node = valid_node(path_node)
|
||||
if node:
|
||||
detected["node"] = str(node)
|
||||
sources["node"] = "path:node"
|
||||
|
||||
install_roots = dedupe(path_roots + path_entry_candidates + scan_openclaw_install_roots())
|
||||
searched["openclaw"].extend(str(p) for p in install_roots)
|
||||
|
||||
def fill_from_root(root: Path, source: str) -> None:
|
||||
if root.is_file():
|
||||
candidate_entry = valid_file(root)
|
||||
if candidate_entry and candidate_entry.name == "entry.js" and "openclaw_entry" not in detected:
|
||||
detected["openclaw_entry"] = str(candidate_entry)
|
||||
sources["openclaw_entry"] = source
|
||||
root = candidate_entry.parent.parent
|
||||
elif candidate_entry and candidate_entry.name == "openclaw.mjs" and "openclaw_mjs" not in detected:
|
||||
detected["openclaw_mjs"] = str(candidate_entry)
|
||||
sources["openclaw_mjs"] = source
|
||||
root = candidate_entry.parent
|
||||
else:
|
||||
return
|
||||
candidate_mjs = valid_file(root / "openclaw.mjs")
|
||||
candidate_entry = valid_file(root / "dist" / "entry.js")
|
||||
if candidate_mjs and "openclaw_mjs" not in detected:
|
||||
detected["openclaw_mjs"] = str(candidate_mjs)
|
||||
sources["openclaw_mjs"] = source
|
||||
if candidate_entry and "openclaw_entry" not in detected:
|
||||
detected["openclaw_entry"] = str(candidate_entry)
|
||||
sources["openclaw_entry"] = source
|
||||
|
||||
for root in install_roots:
|
||||
source = "path:openclaw" if root in path_roots or root in path_entry_candidates else "scan:common-locations"
|
||||
fill_from_root(root, source)
|
||||
if all(k in detected for k in ("openclaw_mjs", "openclaw_entry")):
|
||||
break
|
||||
|
||||
result["detected"] = detected
|
||||
result["sources"] = sources
|
||||
result["searched"] = searched
|
||||
result["ok"] = all(k in detected for k in ("node", "openclaw_mjs", "openclaw_entry"))
|
||||
if not result["ok"]:
|
||||
missing = [k for k in ("node", "openclaw_mjs", "openclaw_entry") if k not in detected]
|
||||
result["missing"] = missing
|
||||
result["error"] = (
|
||||
"Could not auto-detect: " + ", ".join(missing) + ". "
|
||||
"Set WATCHDOG_B_NODE_BIN / WATCHDOG_B_OPENCLAW_MJS / WATCHDOG_B_OPENCLAW_ENTRY explicitly if this host uses a non-standard install path."
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Detect node/openclaw runtime paths for watchdog-b scripts")
|
||||
parser.add_argument("--shell", action="store_true", help="print shell export lines")
|
||||
parser.add_argument("--pretty", action="store_true", help="pretty-print json")
|
||||
args = parser.parse_args()
|
||||
|
||||
result = detect_runtime()
|
||||
if args.shell:
|
||||
if not result["ok"]:
|
||||
print(result["error"], flush=True)
|
||||
return 1
|
||||
detected = result["detected"]
|
||||
print(f'WATCHDOG_B_NODE_BIN={detected["node"]}')
|
||||
print(f'WATCHDOG_B_OPENCLAW_MJS={detected["openclaw_mjs"]}')
|
||||
print(f'WATCHDOG_B_OPENCLAW_ENTRY={detected["openclaw_entry"]}')
|
||||
return 0
|
||||
|
||||
print(json.dumps(result, ensure_ascii=False, indent=2 if args.pretty else None))
|
||||
return 0 if result["ok"] else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
75
scripts/owner_report_consumer.py
Normal file
75
scripts/owner_report_consumer.py
Normal file
@@ -0,0 +1,75 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Minimal owner-report consumer.
|
||||
|
||||
Reads a pending owner report markdown file with simple front-matter-like key/value
|
||||
lines and emits normalized JSON to stdout.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
OWNER_REPORT_ROOT = Path.home() / ".clawteam" / "owner-reports"
|
||||
PENDING_DIR = OWNER_REPORT_ROOT / "pending"
|
||||
|
||||
|
||||
def parse_pending_report(path: Path) -> dict:
|
||||
raw = path.read_text(encoding="utf-8")
|
||||
data: dict[str, str] = {}
|
||||
for line in raw.splitlines():
|
||||
line = line.strip()
|
||||
if not line or ":" not in line:
|
||||
continue
|
||||
key, value = line.split(":", 1)
|
||||
data[key.strip()] = value.strip()
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"path": str(path),
|
||||
"filename": path.name,
|
||||
"report_id": data.get("report_id") or path.stem,
|
||||
"team": data.get("team"),
|
||||
"source": data.get("source"),
|
||||
"report_kind": data.get("report_kind") or "checkpoint",
|
||||
"created_at": data.get("created_at"),
|
||||
"message": _unquote(data.get("message", "")),
|
||||
"raw": data,
|
||||
}
|
||||
|
||||
|
||||
def _unquote(value: str) -> str:
|
||||
value = value.strip()
|
||||
if len(value) >= 2 and value[0] == '"' and value[-1] == '"':
|
||||
return value[1:-1]
|
||||
return value
|
||||
|
||||
|
||||
def resolve_input(name_or_path: str) -> Path:
|
||||
p = Path(name_or_path).expanduser()
|
||||
if p.exists():
|
||||
return p
|
||||
candidate = PENDING_DIR / name_or_path
|
||||
if candidate.exists():
|
||||
return candidate
|
||||
if not candidate.suffix:
|
||||
md_candidate = candidate.with_suffix(".md")
|
||||
if md_candidate.exists():
|
||||
return md_candidate
|
||||
raise FileNotFoundError(f"pending report not found: {name_or_path}")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser(description="Emit JSON for a pending owner report")
|
||||
ap.add_argument("report", help="Pending report path, filename, or report_id")
|
||||
args = ap.parse_args()
|
||||
|
||||
path = resolve_input(args.report)
|
||||
payload = parse_pending_report(path)
|
||||
print(json.dumps(payload, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
118
scripts/owner_report_driver.py
Normal file
118
scripts/owner_report_driver.py
Normal file
@@ -0,0 +1,118 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Minimal owner-report driver.
|
||||
|
||||
Consumes one pending owner report, calls an external send command, and only moves
|
||||
it to sent/ after the send command succeeds.
|
||||
|
||||
This is a deliberately small manual driver for debugging the owner-report chain.
|
||||
It does not watch directories, retry, or send anything by itself.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from owner_report_consumer import OWNER_REPORT_ROOT, PENDING_DIR, parse_pending_report, resolve_input
|
||||
|
||||
SENT_DIR = OWNER_REPORT_ROOT / "sent"
|
||||
|
||||
|
||||
def _build_send_env(payload: dict) -> dict[str, str]:
|
||||
env = os.environ.copy()
|
||||
env.update(
|
||||
{
|
||||
"OWNER_REPORT_JSON": json.dumps(payload, ensure_ascii=False),
|
||||
"OWNER_REPORT_ID": str(payload.get("report_id") or ""),
|
||||
"OWNER_REPORT_TEAM": str(payload.get("team") or ""),
|
||||
"OWNER_REPORT_SOURCE": str(payload.get("source") or ""),
|
||||
"OWNER_REPORT_KIND": str(payload.get("report_kind") or "checkpoint"),
|
||||
"OWNER_REPORT_CREATED_AT": str(payload.get("created_at") or ""),
|
||||
"OWNER_REPORT_MESSAGE": str(payload.get("message") or ""),
|
||||
"OWNER_REPORT_PATH": str(payload.get("path") or ""),
|
||||
}
|
||||
)
|
||||
return env
|
||||
|
||||
|
||||
def _sent_path(src: Path) -> Path:
|
||||
SENT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
return SENT_DIR / src.name
|
||||
|
||||
|
||||
def _finalize_successful_send(src: Path) -> dict[str, object]:
|
||||
dest = _sent_path(src)
|
||||
if src.exists():
|
||||
src.rename(dest)
|
||||
return {"moved": True, "already_archived": False, "final_path": str(dest)}
|
||||
|
||||
if dest.exists():
|
||||
return {"moved": False, "already_archived": True, "final_path": str(dest)}
|
||||
|
||||
raise FileNotFoundError(
|
||||
f"successful send completed but pending report disappeared before archiving: pending={src} sent={dest}"
|
||||
)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser(description="Send one pending owner report via external command")
|
||||
ap.add_argument("report", help="Pending report path, filename, or report_id")
|
||||
ap.add_argument(
|
||||
"--send-cmd",
|
||||
help="Shell command used to send the report. Can also come from OWNER_REPORT_SEND_CMD.",
|
||||
)
|
||||
ap.add_argument("--dry-run", action="store_true", help="Print what would be sent and do not move files")
|
||||
args = ap.parse_args()
|
||||
|
||||
src = resolve_input(args.report)
|
||||
payload = parse_pending_report(src)
|
||||
|
||||
send_cmd = args.send_cmd or os.environ.get("OWNER_REPORT_SEND_CMD")
|
||||
if not send_cmd and not args.dry_run:
|
||||
raise SystemExit("missing send command: use --send-cmd or OWNER_REPORT_SEND_CMD")
|
||||
|
||||
if args.dry_run:
|
||||
print(json.dumps({
|
||||
"ok": True,
|
||||
"dry_run": True,
|
||||
"action": "would_send",
|
||||
"pending_path": str(src),
|
||||
"sent_path": str(_sent_path(src)),
|
||||
"payload": payload,
|
||||
"send_cmd": send_cmd,
|
||||
}, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
|
||||
proc = subprocess.run(
|
||||
["bash", "-lc", send_cmd],
|
||||
text=True,
|
||||
capture_output=True,
|
||||
env=_build_send_env(payload),
|
||||
)
|
||||
|
||||
result = {
|
||||
"ok": proc.returncode == 0,
|
||||
"dry_run": False,
|
||||
"pending_path": str(src),
|
||||
"sent_path": str(_sent_path(src)),
|
||||
"send_cmd": send_cmd,
|
||||
"exit_code": proc.returncode,
|
||||
"stdout": proc.stdout,
|
||||
"stderr": proc.stderr,
|
||||
"payload": payload,
|
||||
}
|
||||
|
||||
if proc.returncode != 0:
|
||||
print(json.dumps(result, ensure_ascii=False, indent=2))
|
||||
return proc.returncode
|
||||
|
||||
result.update(_finalize_successful_send(src))
|
||||
print(json.dumps(result, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
143
scripts/owner_report_producer.py
Normal file
143
scripts/owner_report_producer.py
Normal file
@@ -0,0 +1,143 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Minimal owner-report producer for ClawTeam-style worker checkpoints.
|
||||
|
||||
Writes ~/.clawteam/owner-reports/pending/<report_id>.md using explicit checkpoint
|
||||
fields and a human-readable message suitable for direct Telegram delivery.
|
||||
|
||||
This intentionally stays tiny:
|
||||
- no daemon
|
||||
- no event bus
|
||||
- no parser for arbitrary logs
|
||||
- just explicit fields in -> pending markdown out
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from owner_report_consumer import OWNER_REPORT_ROOT
|
||||
|
||||
PENDING_DIR = OWNER_REPORT_ROOT / "pending"
|
||||
|
||||
|
||||
def _slug(value: str) -> str:
|
||||
slug = re.sub(r"[^a-zA-Z0-9._-]+", "-", value.strip()).strip("-._")
|
||||
return slug or "report"
|
||||
|
||||
|
||||
def _now_iso() -> str:
|
||||
return datetime.now().astimezone().isoformat(timespec="seconds")
|
||||
|
||||
|
||||
def build_message(*, team: str, worker: str, task_id: str, progress: str, done: str, next_step: str, status: str, source: str | None, report_kind: str) -> str:
|
||||
headline = f"🔔 [{team}] {worker}"
|
||||
if report_kind == "leader-final":
|
||||
headline = f"✅ [{team}] final"
|
||||
|
||||
lines = [
|
||||
headline,
|
||||
done,
|
||||
]
|
||||
|
||||
if next_step.strip():
|
||||
lines.append(f"→ {next_step}")
|
||||
|
||||
tech = [
|
||||
f"task={task_id}",
|
||||
f"status={status}",
|
||||
f"progress={progress}",
|
||||
]
|
||||
if source:
|
||||
tech.append(f"source={source}")
|
||||
lines.append(" | ".join(tech))
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def build_report_body(*, report_id: str, team: str, worker: str, task_id: str, progress: str, done: str, next_step: str, status: str, source: str | None, created_at: str, message: str, report_kind: str) -> str:
|
||||
fields: list[tuple[str, str | None]] = [
|
||||
("report_id", report_id),
|
||||
("team", team),
|
||||
("worker", worker),
|
||||
("task_id", task_id),
|
||||
("progress", progress),
|
||||
("done", done),
|
||||
("next", next_step),
|
||||
("status", status),
|
||||
("report_kind", report_kind),
|
||||
("source", source),
|
||||
("created_at", created_at),
|
||||
("message", json.dumps(message, ensure_ascii=False)),
|
||||
]
|
||||
return "\n".join(f"{k}: {v}" for k, v in fields if v is not None) + "\n"
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser(description="Create one pending owner report from explicit checkpoint fields")
|
||||
ap.add_argument("--team", required=True)
|
||||
ap.add_argument("--worker", required=True)
|
||||
ap.add_argument("--task-id", required=True)
|
||||
ap.add_argument("--progress", required=True)
|
||||
ap.add_argument("--done", required=True)
|
||||
ap.add_argument("--next", dest="next_step", required=True)
|
||||
ap.add_argument("--status", required=True)
|
||||
ap.add_argument("--source")
|
||||
ap.add_argument("--report-kind", choices=["checkpoint", "leader-final"], default="checkpoint")
|
||||
ap.add_argument("--report-id", help="Optional explicit report_id / filename stem")
|
||||
ap.add_argument("--created-at", default=_now_iso())
|
||||
ap.add_argument("--dry-run", action="store_true")
|
||||
args = ap.parse_args()
|
||||
|
||||
report_id = args.report_id or f"{_slug(args.team)}-{_slug(args.worker)}-{_slug(args.task_id)}-{_slug(args.report_kind)}"
|
||||
message = build_message(
|
||||
team=args.team,
|
||||
worker=args.worker,
|
||||
task_id=args.task_id,
|
||||
progress=args.progress,
|
||||
done=args.done,
|
||||
next_step=args.next_step,
|
||||
status=args.status,
|
||||
source=args.source,
|
||||
report_kind=args.report_kind,
|
||||
)
|
||||
body = build_report_body(
|
||||
report_id=report_id,
|
||||
team=args.team,
|
||||
worker=args.worker,
|
||||
task_id=args.task_id,
|
||||
progress=args.progress,
|
||||
done=args.done,
|
||||
next_step=args.next_step,
|
||||
status=args.status,
|
||||
source=args.source,
|
||||
created_at=args.created_at,
|
||||
message=message,
|
||||
report_kind=args.report_kind,
|
||||
)
|
||||
|
||||
path = PENDING_DIR / f"{report_id}.md"
|
||||
|
||||
result = {
|
||||
"ok": True,
|
||||
"report_id": report_id,
|
||||
"path": str(path),
|
||||
"message": message,
|
||||
"dry_run": args.dry_run,
|
||||
}
|
||||
|
||||
if args.dry_run:
|
||||
result["body"] = body
|
||||
print(json.dumps(result, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
|
||||
PENDING_DIR.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(body, encoding="utf-8")
|
||||
print(json.dumps(result, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
141
scripts/run_watchdog_b.sh
Executable file
141
scripts/run_watchdog_b.sh
Executable file
@@ -0,0 +1,141 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Watchdog B v2 dispatcher/runner.
|
||||
# Unified entrypoint for timer/service/manual runs.
|
||||
#
|
||||
# Flow:
|
||||
# 1) Call check_openclaw_state.sh to get one of: running | stalled | idle
|
||||
# 2) Emit a human-readable action template for the detected state
|
||||
# 3) Invoke the notification layer (dry-run/manual by default, configurable)
|
||||
# 4) Persist rendered output for local verification / future integrations
|
||||
#
|
||||
# Notification behavior is intentionally conservative:
|
||||
# - running: defaults to a manual/queue-ready owner report path
|
||||
# - stalled/idle: nudge main agent first, then optionally escalate to owner report
|
||||
# - outbound owner messaging reuses the existing owner-reporting-system queue
|
||||
|
||||
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
|
||||
SKILL_DIR="$(cd -- "$SCRIPT_DIR/.." && pwd)"
|
||||
WATCHDOG_B_CONFIG_FILE_DEFAULT="$HOME/.config/openclaw/watchdog-b.env"
|
||||
WATCHDOG_B_CONFIG_FILE="${WATCHDOG_B_CONFIG_FILE:-$WATCHDOG_B_CONFIG_FILE_DEFAULT}"
|
||||
if [[ -f "$WATCHDOG_B_CONFIG_FILE" ]]; then
|
||||
set -a
|
||||
# shellcheck disable=SC1090
|
||||
. "$WATCHDOG_B_CONFIG_FILE"
|
||||
set +a
|
||||
fi
|
||||
|
||||
WORKSPACE_DEFAULT="$HOME/.openclaw/workspace"
|
||||
WORKSPACE_DIR="${WATCHDOG_B_WORKSPACE:-$WORKSPACE_DEFAULT}"
|
||||
CHECKER="${WATCHDOG_B_CHECKER:-$SCRIPT_DIR/check_openclaw_state.sh}"
|
||||
ARTIFACT_DIR="${WATCHDOG_B_ARTIFACT_DIR:-$WORKSPACE_DIR/state/watchdog-b}"
|
||||
TIMESTAMP="$(date '+%Y-%m-%dT%H:%M:%S%z')"
|
||||
HOSTNAME_VALUE="$(hostname 2>/dev/null || echo unknown-host)"
|
||||
NOTIFIER="${WATCHDOG_B_NOTIFIER:-$SCRIPT_DIR/notify_watchdog_b.py}"
|
||||
NOTIFY_DRY_RUN="${WATCHDOG_B_NOTIFY_DRY_RUN:-1}"
|
||||
|
||||
mkdir -p "$ARTIFACT_DIR"
|
||||
|
||||
if [[ ! -x "$CHECKER" ]]; then
|
||||
echo "watchdog-b error: checker not executable: $CHECKER" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
STATE="$($CHECKER)"
|
||||
|
||||
emit_running() {
|
||||
cat <<EOF
|
||||
WATCHDOG_B_STATE=running
|
||||
WATCHDOG_B_TIMESTAMP=$TIMESTAMP
|
||||
WATCHDOG_B_HOST=$HOSTNAME_VALUE
|
||||
WATCHDOG_B_ACTION=progress-template
|
||||
WATCHDOG_B_TEMPLATE_BEGIN
|
||||
[watchdog-b][running] OpenClaw main runtime appears active.
|
||||
Suggested future progress-report message template:
|
||||
- Status: still running
|
||||
- Checked at: $TIMESTAMP
|
||||
- Host: $HOSTNAME_VALUE
|
||||
- Summary: main runtime is alive and log activity is fresh.
|
||||
- Next step: if desired, attach latest task/progress snapshot before sending.
|
||||
WATCHDOG_B_TEMPLATE_END
|
||||
WATCHDOG_B_NEXT_HOOK=progress_report_stub
|
||||
EOF
|
||||
}
|
||||
|
||||
emit_stalled() {
|
||||
cat <<EOF
|
||||
WATCHDOG_B_STATE=stalled
|
||||
WATCHDOG_B_TIMESTAMP=$TIMESTAMP
|
||||
WATCHDOG_B_HOST=$HOSTNAME_VALUE
|
||||
WATCHDOG_B_ACTION=nudge-template
|
||||
WATCHDOG_B_TEMPLATE_BEGIN
|
||||
[watchdog-b][stalled] OpenClaw main runtime looks alive but may be stuck.
|
||||
Suggested future nudge/escalation template:
|
||||
- Audience: main agent and/or Eric
|
||||
- Checked at: $TIMESTAMP
|
||||
- Host: $HOSTNAME_VALUE
|
||||
- Observation: process is alive, but activity log appears stale beyond threshold.
|
||||
- Suggested ask: please confirm current task state, unblock reason, or whether intervention is needed.
|
||||
WATCHDOG_B_TEMPLATE_END
|
||||
WATCHDOG_B_NEXT_HOOK=stalled_nudge_stub
|
||||
EOF
|
||||
}
|
||||
|
||||
emit_idle() {
|
||||
cat <<EOF
|
||||
WATCHDOG_B_STATE=idle
|
||||
WATCHDOG_B_TIMESTAMP=$TIMESTAMP
|
||||
WATCHDOG_B_HOST=$HOSTNAME_VALUE
|
||||
WATCHDOG_B_ACTION=idle-template
|
||||
WATCHDOG_B_TEMPLATE_BEGIN
|
||||
[watchdog-b][idle] OpenClaw main runtime does not appear to be actively running.
|
||||
Suggested future reminder template:
|
||||
- Audience: main agent and/or Eric
|
||||
- Checked at: $TIMESTAMP
|
||||
- Host: $HOSTNAME_VALUE
|
||||
- Observation: no live runtime detected from pid/log heuristic.
|
||||
- Suggested ask: confirm whether the runtime should be started, ignored, or left idle.
|
||||
WATCHDOG_B_TEMPLATE_END
|
||||
WATCHDOG_B_NEXT_HOOK=idle_reminder_stub
|
||||
EOF
|
||||
}
|
||||
|
||||
case "$STATE" in
|
||||
running)
|
||||
OUTPUT="$(emit_running)"
|
||||
;;
|
||||
stalled)
|
||||
OUTPUT="$(emit_stalled)"
|
||||
;;
|
||||
idle)
|
||||
OUTPUT="$(emit_idle)"
|
||||
;;
|
||||
*)
|
||||
echo "watchdog-b error: unexpected state from checker: $STATE" >&2
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
|
||||
printf '%s\n' "$OUTPUT"
|
||||
|
||||
NOTIFY_OUTPUT=""
|
||||
if [[ -x "$NOTIFIER" ]]; then
|
||||
NOTIFY_CMD=("$NOTIFIER" --state "$STATE" --timestamp "$TIMESTAMP")
|
||||
if [[ "$NOTIFY_DRY_RUN" == "1" ]]; then
|
||||
NOTIFY_CMD+=(--dry-run)
|
||||
fi
|
||||
if NOTIFY_OUTPUT="$(WATCHDOG_B_ARTIFACT_DIR="$ARTIFACT_DIR" "${NOTIFY_CMD[@]}" 2>&1)"; then
|
||||
printf '%s\n' "$NOTIFY_OUTPUT"
|
||||
else
|
||||
printf '%s\n' "$NOTIFY_OUTPUT"
|
||||
echo "watchdog-b warning: notifier returned non-zero for state=$STATE" >&2
|
||||
fi
|
||||
else
|
||||
echo "watchdog-b warning: notifier not executable: $NOTIFIER" >&2
|
||||
fi
|
||||
|
||||
printf '%s\n' "$OUTPUT" > "$ARTIFACT_DIR/last-output.txt"
|
||||
printf '%s\n' "$NOTIFY_OUTPUT" > "$ARTIFACT_DIR/last-notify-output.txt"
|
||||
printf '%s\t%s\n' "$TIMESTAMP" "$STATE" >> "$ARTIFACT_DIR/history.tsv"
|
||||
printf '%s\n' "$STATE" > "$ARTIFACT_DIR/last-state.txt"
|
||||
65
scripts/verify_watchdog_b_e2e.sh
Executable file
65
scripts/verify_watchdog_b_e2e.sh
Executable file
@@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
|
||||
WORKSPACE="$(cd -- "$SCRIPT_DIR/../.." && pwd)"
|
||||
ARTIFACT_ROOT="${WATCHDOG_B_VERIFY_ROOT:-$WORKSPACE/state/watchdog-b-verify-e2e}"
|
||||
RUN_ID="${RUN_ID:-$(date +%Y%m%dT%H%M%S)}"
|
||||
RUN_DIR="$ARTIFACT_ROOT/$RUN_ID"
|
||||
FIXTURE_DIR="$RUN_DIR/fixture"
|
||||
LOG="$RUN_DIR/verify.log"
|
||||
STATE_DIR="$RUN_DIR/state"
|
||||
QUEUE_SNAPSHOT="$RUN_DIR/queue-before.txt"
|
||||
QUEUE_AFTER="$RUN_DIR/queue-after.txt"
|
||||
mkdir -p "$FIXTURE_DIR/host-runtime" "$FIXTURE_DIR/logs" "$STATE_DIR" "$RUN_DIR"
|
||||
|
||||
exec > >(tee -a "$LOG") 2>&1
|
||||
|
||||
echo "[verify] run_id=$RUN_ID"
|
||||
echo "[verify] workspace=$WORKSPACE"
|
||||
date -Iseconds
|
||||
|
||||
echo "[verify] snapshot owner-report queue before"
|
||||
find "$HOME/.clawteam/owner-reports" -maxdepth 2 -type f | sort > "$QUEUE_SNAPSHOT" || true
|
||||
|
||||
sleep 180 &
|
||||
FAKE_PID=$!
|
||||
trap 'kill "$FAKE_PID" 2>/dev/null || true' EXIT
|
||||
printf '%s\n' "$FAKE_PID" > "$FIXTURE_DIR/host-runtime/openclaw.pid"
|
||||
touch "$FIXTURE_DIR/logs/openclaw.log"
|
||||
|
||||
echo "[verify] run watchdog-b direct E2E (enqueue + direct delivery)"
|
||||
OPENCLAW_PID_FILE="$FIXTURE_DIR/host-runtime/openclaw.pid" \
|
||||
OPENCLAW_LOG_FILE="$FIXTURE_DIR/logs/openclaw.log" \
|
||||
STALL_AFTER_SECONDS=1200 \
|
||||
WATCHDOG_B_ARTIFACT_DIR="$STATE_DIR" \
|
||||
WATCHDOG_B_NOTIFY_DRY_RUN=0 \
|
||||
WATCHDOG_B_RUNNING_REPORT_MODE=enqueue-and-drain \
|
||||
WATCHDOG_B_RUNNING_REPORT_MIN_INTERVAL_SECONDS=0 \
|
||||
"$WORKSPACE/scripts/watchdog-b/run_watchdog_b.sh" | tee "$RUN_DIR/run-output.txt"
|
||||
|
||||
echo "[verify] snapshot owner-report queue after"
|
||||
find "$HOME/.clawteam/owner-reports" -maxdepth 2 -type f | sort > "$QUEUE_AFTER" || true
|
||||
|
||||
echo "[verify] summarize"
|
||||
REPORT_ID="$(python3 - <<'PY' "$STATE_DIR/notify-state.json"
|
||||
import json,sys
|
||||
p=sys.argv[1]
|
||||
with open(p,'r',encoding='utf-8') as f:
|
||||
data=json.load(f)
|
||||
print(data['events']['running']['last_result']['enqueue']['report_id'])
|
||||
PY
|
||||
)"
|
||||
|
||||
echo "REPORT_ID=$REPORT_ID" | tee "$RUN_DIR/result.env"
|
||||
SENT_PATH="$HOME/.clawteam/owner-reports/sent/$REPORT_ID.md"
|
||||
echo "SENT_PATH=$SENT_PATH" | tee -a "$RUN_DIR/result.env"
|
||||
if [[ ! -f "$SENT_PATH" ]]; then
|
||||
echo "[verify] ERROR: sent file missing: $SENT_PATH" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[verify] sent file found"
|
||||
sed -n '1,120p' "$SENT_PATH" | tee "$RUN_DIR/sent-head.txt"
|
||||
|
||||
echo "[verify] done"
|
||||
40
scripts/watchdog-b.env.example
Normal file
40
scripts/watchdog-b.env.example
Normal file
@@ -0,0 +1,40 @@
|
||||
# Single source of truth for watchdog-b owner-facing policy.
|
||||
# Preferred location: ~/.config/openclaw/watchdog-b.env
|
||||
# Can also be loaded manually by:
|
||||
# WATCHDOG_B_CONFIG_FILE=... ./scripts/watchdog-b/run_watchdog_b.sh
|
||||
# WATCHDOG_B_CONFIG_FILE=... ./scripts/watchdog-b/notify_watchdog_b.py --state running
|
||||
|
||||
# --- delivery / runtime policy ---
|
||||
WATCHDOG_B_NOTIFY_DRY_RUN=0
|
||||
WATCHDOG_B_RUNNING_REPORT_MODE=enqueue-and-drain
|
||||
WATCHDOG_B_RUNNING_REPORT_MIN_INTERVAL_SECONDS=3600
|
||||
WATCHDOG_B_OWNER_DELIVERY_MODE=direct-discord
|
||||
WATCHDOG_B_OWNER_REPORT_CHANNEL=discord
|
||||
WATCHDOG_B_OWNER_REPORT_TARGET=channel:REPLACE_ME
|
||||
|
||||
# --- non-running escalation policy ---
|
||||
# Set this only if the host actually has a valid OpenClaw agent id to nudge.
|
||||
# If left unset, stalled/idle paths skip main-agent nudge and can still escalate owner-facing reports.
|
||||
# WATCHDOG_B_MAIN_AGENT_ID=main
|
||||
# WATCHDOG_B_STALLED_OWNER_MODE=escalate
|
||||
# WATCHDOG_B_IDLE_OWNER_MODE=escalate
|
||||
# WATCHDOG_B_STALLED_OWNER_ESCALATION_AFTER=2
|
||||
# WATCHDOG_B_IDLE_OWNER_ESCALATION_AFTER=2
|
||||
# WATCHDOG_B_STALLED_NUDGE_MIN_INTERVAL_SECONDS=900
|
||||
# WATCHDOG_B_IDLE_NUDGE_MIN_INTERVAL_SECONDS=1800
|
||||
|
||||
# --- owner-facing message style ---
|
||||
WATCHDOG_B_RUNNING_EMOJI=✅
|
||||
WATCHDOG_B_RUNNING_SUMMARY=主程序仍在運行
|
||||
WATCHDOG_B_STALLED_EMOJI=⚠️
|
||||
WATCHDOG_B_STALLED_SUMMARY=主程序疑似卡住
|
||||
WATCHDOG_B_IDLE_EMOJI=🛑
|
||||
WATCHDOG_B_IDLE_SUMMARY=主程序目前未運行
|
||||
|
||||
# Optional overrides for the compact technical line.
|
||||
# WATCHDOG_B_RUNNING_PROGRESS_LABEL=running
|
||||
# WATCHDOG_B_STALLED_PROGRESS_LABEL=stalled
|
||||
# WATCHDOG_B_IDLE_PROGRESS_LABEL=idle
|
||||
# WATCHDOG_B_RUNNING_STATUS=normal
|
||||
# WATCHDOG_B_STALLED_STATUS=needs-attention
|
||||
# WATCHDOG_B_IDLE_STATUS=needs-attention
|
||||
Reference in New Issue
Block a user