From 34f035cfb5cb7b6b1aee12e3fbd14587595e95bc Mon Sep 17 00:00:00 2001 From: Eve Date: Fri, 8 May 2026 10:56:30 +0800 Subject: [PATCH] feat(reporting-governance): add minimal runtime integrated slice --- .../agent-reporting-governance-plugin.md | 243 +++++------------- plugins/reporting-governance/README.md | 12 +- plugins/reporting-governance/package.json | 2 +- .../reporting-governance/src/core/index.mjs | 1 + .../src/core/runtime-integrated.mjs | 52 ++++ plugins/reporting-governance/src/index.mjs | 1 + .../exports-boundary.integration.test.mjs | 60 +++-- .../runtime-integrated.integration.test.mjs | 207 +++++++++++++++ 8 files changed, 363 insertions(+), 215 deletions(-) create mode 100644 plugins/reporting-governance/src/core/runtime-integrated.mjs create mode 100644 plugins/reporting-governance/test/runtime-integrated.integration.test.mjs diff --git a/docs/architecture/agent-reporting-governance-plugin.md b/docs/architecture/agent-reporting-governance-plugin.md index 5a0cf7e..ee89c84 100644 --- a/docs/architecture/agent-reporting-governance-plugin.md +++ b/docs/architecture/agent-reporting-governance-plugin.md @@ -1,215 +1,68 @@ # Agent Reporting Governance Plugin -## Product Definition +## Why this exists -### Problem statement +This plugin exists to turn reporting norms into enforceable, auditable behavior across agent runtimes. -Multi-agent systems fail in predictable reporting ways long before they fail technically. A task may be dispatched correctly, work may even be progressing, yet the operator still loses control because reporting is missing, delayed, misleading, or unverifiable. +The core problem is not only whether work happened, but whether reporting about the work was: -Today, many of these expectations live only in prompts, habits, or reviewer discipline. That is not enough. Prompt-only norms are easy to forget, easy to bypass, hard to audit, and inconsistent across personas, workspaces, runtimes, and machines. The result is the same class of operational failure repeating in different forms: +- timely +- truthful +- operator-visible when required +- backed by inspectable evidence +- portable across runtime boundaries -- no report -- late report -- fake progress -- forgotten checkpoint -- unverified completion claims -- subagent result not forwarded -- placeholder or proxy reports that are presented as if they were final, direct, or verified +A prompt can encourage this behavior, but prompts alone cannot reliably guarantee it. -The Agent Reporting Governance Plugin exists to turn reporting from a soft expectation into an enforceable, portable product capability. - -### Target users - -This plugin is for teams and operators who rely on agents to do real work and need trustworthy operational visibility, especially: - -- primary operators supervising one or more agents -- people running subagent-based workflows where completion can be lost between child and main sessions -- workspace owners who need common reporting rules across multiple projects -- platform builders who want reporting controls to survive prompt changes and runtime differences -- reviewers and auditors who need evidence about what was actually reported, when, and with what verification status - -### Core outcomes - -The product should produce a small set of high-value outcomes: - -1. **Reporting becomes governable** - Reporting requirements are expressed as policy and enforcement, not just advice in prompts. - -2. **Operators can distinguish real progress from narrative progress** - The system should make it harder for an agent to appear active without providing meaningful status evidence. - -3. **Critical workflow handoffs become visible** - Especially for subagent orchestration, the system should surface when a child result exists but was not forwarded through the required reporting path. - -4. **Completion claims become policy-aware** - A task should not be treated as cleanly complete when required checkpoints, evidence, or verification steps are missing. - -5. **Placeholder/proxy reporting stays honest** - If an agent is reporting a guess, summary, relay, or provisional state rather than a verified direct result, that status must be explicitly labeled. - -6. **Reporting rules become reusable across environments** - The same governance model should be deployable across different machines, workspaces, and runtime implementations. - -### Why prompt-only rules are insufficient - -Prompt instructions are necessary, but they are not a durable control surface for governance. - -Prompt-only rules are insufficient because: - -- **they are not reliably enforced** — an agent can ignore, forget, or partially follow them -- **they are not portable** — behavior changes when the persona, system prompt, workspace conventions, or wrapper prompt changes -- **they are hard to audit** — operators cannot easily inspect which reporting obligations were active and whether they were satisfied -- **they break under orchestration** — subagent dispatch, forwarded results, checkpoints, and verification claims need machine-checkable state, not only natural language reminders -- **they invite ambiguity** — an agent can produce language that sounds compliant without actually meeting reporting requirements -- **they are weak against operational drift** — over time, informal rules become inconsistently applied across repos, sessions, and machines +## Architectural stance This product therefore treats prompts as guidance, but not as the sole enforcement mechanism. Governance must be embodied in plugin logic, policy artifacts, adapters, receipts, and verifiable state transitions. -## Product scope +The plugin boundary is deliberately split into: -The Agent Reporting Governance Plugin defines and enforces reporting obligations for agent workflows. In MVP and beyond, its scope is specifically about governing reporting quality, timing, and truthfulness — not about replacing all workflow logic. +- `core/` for runtime-agnostic governance meaning +- `adapters/` for runtime-facing execution seams +- `storage/` for durable artifact I/O contracts +- package artifacts for deployable truth such as profiles and capability descriptors +- reference compositions for concrete runtime wiring -### In scope +## Package artifact posture -The plugin governs whether required report states and transitions are present, missing, late, misleading, or improperly represented. +The plugin should treat policy packs and deployment profiles as versioned artifacts. -It must explicitly govern these cases: +That means: -- **No report**: required status or completion reporting never happened -- **Late report**: a required report arrived after the allowed checkpoint or reporting window -- **Fake progress**: the agent reported activity or confidence without enough grounding evidence, concrete advancement, or truthful status framing -- **Forgotten checkpoint**: an expected intermediate report or review checkpoint was skipped -- **Unverified completion claims**: the agent claimed a task was done, fixed, passing, or otherwise complete without the required verification status or evidence -- **Subagent result not forwarded**: a child/subagent result existed or completed, but the required result was not forwarded into the main reporting path -- **Placeholder/proxy reports**: relayed, guessed, summarized, pending, or surrogate reports are allowed only if they are explicitly labeled as placeholders, proxies, or otherwise non-final/non-direct - -### Functional scope - -The product should support policy-driven handling for questions such as: - -- what kinds of reports are required for a task type or workflow stage -- when a checkpoint becomes overdue -- what evidence is required before a completion claim is considered valid -- how a forwarded subagent result is distinguished from a missing or silently dropped result -- how provisional, relayed, or second-hand reporting must be labeled -- what enforcement action should occur when governance rules are violated - -### Enforcement scope - -The plugin is intended to provide reusable governance primitives such as: - -- reporting policy evaluation -- checkpoint and lateness evaluation -- completion-claim verification gates -- forwarding/relay integrity checks for subagent workflows -- labeling requirements for placeholder or proxy reports -- structured outputs that adapters can use to block, warn, escalate, annotate, or require correction - -### Portability requirements - -Portability is a first-class product requirement, not a future nice-to-have. - -The plugin must: - -- **not depend on one persona, one prompt, or one workspace** -- **work across machines** -- **support multiple runtimes through adapters** -- **treat policy packs and deployment profiles as versioned artifacts** - -This means the product definition assumes: - -- reporting governance rules live outside any single prompt personality -- enforcement logic can be installed in different workspaces without rewriting the core policy model -- adapter layers translate runtime-specific events into a common governance model - policy packs can be pinned, reviewed, diffed, promoted, and rolled back like other versioned operational artifacts -- deployment profiles can express environment-specific wiring without forking the product definition itself +- deployment profiles should evolve from external prose/YAML into package-owned deployable artifacts +- capability descriptors should honestly declare what a runtime can and cannot do -## Non-goals +This is essential because runtime truth cannot remain implicit. -To keep the product sharp, the following are explicitly out of scope for this plugin: +## Runtime truth model -- building a general-purpose workflow engine for every agent behavior -- deciding whether a task is product-correct or technically correct in all domains -- replacing human judgment for approval, acceptance, or managerial sign-off -- guaranteeing that an agent never lies; the product instead focuses on making false or unsupported reporting detectable and governable -- prescribing one universal UX, one runtime, one transport, or one storage backend -- coupling governance to one vendor model, one OpenClaw persona, or one repository layout -- forcing all workflows to use subagents; the plugin must still help in single-agent flows, but subagent forwarding integrity is a key governed case +A runtime is not trustworthy because it says it sent a notice. +It is trustworthy when it can preserve state transitions and artifacts that prove what happened. -## Product boundaries and design stance +For the current OpenClaw reference path, that means durable visibility across: -This plugin should be understood as governance infrastructure for reporting behavior. +- watchdog evidence artifacts +- canonical event artifacts +- queue items +- spool / handoff artifacts +- bridge receipts +- sender attempt artifacts +- final ack / blocked / pending delivery state -It is not merely a documentation bundle and not merely a prompt patch. It is a product layer that sits between workflow events and operator trust. Its job is to convert ambiguous reporting norms into explicit, inspectable policy outcomes. +## Reference runtime composition -The design stance is: - -- **policy over folklore** -- **evidence over vibes** -- **portable adapters over workspace-specific hacks** -- **explicit labels over ambiguous proxy language** -- **governed completion over self-declared completion** - -## Current architecture trajectory - -The product definition is now backed by a concrete runtime-integration direction: - -- canonical governance contracts are defined in the event, evidence, decision, and policy-pack specs -- runtime portability is carried by an explicit adapter interface -- the completed watchdog auto-notify chain is treated as the first reference adapter composition rather than as standalone repair glue - -In practice, the current reference composition is: +The current mainline reference composition is the watchdog chain: ```text -watchdog runner - -> canonical event - -> operator notification queue - -> dispatcher spool handoff - -> bridge supervisor - -> sender binding - -> acked | blocked | pending_external_send receipt +watchdog -> queue -> dispatcher -> bridge -> sender binding -> acked|blocked|pending_external_send ``` -This matters architecturally because it proves the plugin can: - -- turn elapsed-time governance failure into canonical machine-readable events -- route required operator-visible follow-up across runtime boundaries -- preserve honest delivery semantics instead of collapsing `dispatched` into false success -- provide a migration path from repo scripts toward package-level adapters and deployment profiles - -## Compatibility envelope and legacy compatibility mode - -The architecture now draws a hard line between two caller postures: - -### Compatibility envelope - -A caller is inside the compatibility envelope once it supplies either: - -- a deployment profile / profile artifact, or -- a package version pin - -Inside this envelope, runtime compatibility is enforced against the descriptor as a truth contract: - -- canonical schema paths must match -- requested plugin version must be declared compatible -- required capability expectations must be satisfied -- requested actions must be supportable or honestly degraded - -If not, the system fails closed before producing a runnable enforcement contract. - -### Legacy compatibility mode - -Legacy compatibility mode exists only so older callers that still invoke package core without profile/package metadata do not break immediately. - -Behavior in this mode: - -- no version pin is assumed -- schema mismatch is surfaced in `schema_checks` but does not hard-fail by itself -- preflight records migration debt as notes -- truth semantics for actual planning are still preserved - -This is a migration concession, not a long-term steady state. -New callers should move to profile/package-backed invocation. +This is not just repo glue anymore. +It is the first real reference runtime composition that the package architecture is being shaped around. ## Minimal package profile artifact trajectory @@ -228,10 +81,30 @@ Architectural meaning: - runtime binding can be derived from the artifact rather than hardcoded entirely in docs - tests prove the artifact resolves into concrete script and runtime-artifact paths - `artifact_roots` enforcement is now two-layered for this slice: lexical boundary rejection plus realpath-level symlink escape rejection +- specifically for `queueItems`, the path is checked twice: once at profile-artifact load/validation time, and again at orchestrator use-time before runtime consumption This is intentionally still a **minimal verifiable slice**, not the full deployment system. It proves the package boundary can own profile artifacts and bind them into runtime execution inputs. +## Minimal runtime-integrated trajectory + +The next notch is no longer documentation-only binding exposure. +There is now a minimal runtime-integrated path that connects: + +- `executeGovernanceContract(...)` +- validated profile artifact / `deploymentBinding` +- orchestrator adapter runtime invocation + +Meaning: + +- core still evaluates and plans first +- runtime execution is attempted only when the contract truthfully requires adapter action such as `notify_operator` +- the same validated deployment binding is reused as the adapter input instead of re-declaring runtime wiring out of band +- tests prove this path produces real queue + receipt side effects under dry-run orchestration + +This is still intentionally not full inline interception. +It is the smallest verifiable seam that shows planning and runtime composition are genuinely wired. + Primary follow-on specs: - `docs/specs/reporting-governance-adapter-interface.md` diff --git a/plugins/reporting-governance/README.md b/plugins/reporting-governance/README.md index e673ac6..63034bf 100644 --- a/plugins/reporting-governance/README.md +++ b/plugins/reporting-governance/README.md @@ -11,6 +11,7 @@ Current purpose: - provide a minimal package-level policy evaluator and decision runner skeleton that can be verified in isolation - add one minimal package-owned deployment profile artifact / loader / binding contract slice that is executable in tests - let profile artifacts drive one real orchestrator adapter entrypoint instead of staying test-only +- add one minimal runtime-integrated slice wiring contract planning into real orchestrator execution ## Package skeleton @@ -28,6 +29,7 @@ plugins/reporting-governance/ policy-evaluator.mjs decision-runner.mjs execute-governance-contract.mjs + runtime-integrated.mjs adapters/ storage/ reference/ @@ -92,6 +94,7 @@ What is currently exposed from the root export: - `evaluatePolicies(...)` - `planDecisionExecution(...)` - `executeGovernanceContract(...)` +- `executeRuntimeIntegratedGovernance(...)` - package metadata helpers such as `packageName` - package-owned adapter entrypoints and `runWatchdogChain(...)` @@ -161,6 +164,7 @@ What this slice does: 5. validator rejects `artifact_roots` absolute paths, lexical escapes, and symlink escapes that resolve outside repo realpath boundary 6. adapter runtime binding can be instantiated from that contract in tests 7. orchestrator adapter can now bootstrap from package profile artifact input directly +8. `queueItems` now has two checks: load-time artifact validation and orchestrator use-time realpath recheck before runtime consumption What this slice does **not** claim yet: @@ -178,6 +182,7 @@ The current package now includes a small but runnable `core/` implementation: - `src/core/policy-evaluator.mjs` - `src/core/decision-runner.mjs` - `src/core/execute-governance-contract.mjs` +- `src/core/runtime-integrated.mjs` - `src/core/index.mjs` Current package-core responsibilities: @@ -190,6 +195,7 @@ Current package-core responsibilities: - truthfully degrade unsupported enforcement paths based on the capability descriptor - provide one minimal contract path from `capability descriptor -> policy decision -> execution planning` - surface deployment binding metadata when caller passes a validated profile artifact +- optionally hand that deployment binding into the orchestrator adapter when caller explicitly supplies runtime execution inputs Still **runtime-adapter responsibility** at this stage: @@ -209,12 +215,14 @@ This slice now has one small but testable contract path: 2. policy evaluator emits a canonical decision from event/evidence/context 3. decision runner converts that decision into execution planning 4. validated profile artifact can supply deployment binding metadata -5. orchestrator adapter can consume profile artifact bindings and run one real runtime layer -6. the result declares: +5. runtime-integrated helper can take that binding and route it into the orchestrator adapter +6. orchestrator adapter consumes the same binding and runs one real runtime layer +7. the result declares: - adapter-dispatch actions required - package-core actions possible locally - blocked mandatory actions when capability support is missing - truthful delivery / receipt state + - runtime execution result when explicitly requested This is intentionally **planning-level end-to-end plus one adapter bootstrap layer**, not full live inline interception. It proves contract alignment without pretending all runtime enforcement is already extracted. diff --git a/plugins/reporting-governance/package.json b/plugins/reporting-governance/package.json index ffc4522..adbe27b 100644 --- a/plugins/reporting-governance/package.json +++ b/plugins/reporting-governance/package.json @@ -14,6 +14,6 @@ "./adapters/orchestrator": "./src/adapters/orchestrator.mjs" }, "scripts": { - "test": "node --test test/package-structure.test.mjs test/policy-evaluator.test.mjs test/compatibility-preflight.test.mjs test/profile-artifact.test.mjs test/decision-runner.test.mjs test/governance-contract.integration.test.mjs test/watchdog-chain.integration.test.mjs test/exports-boundary.integration.test.mjs" + "test": "node --test test/package-structure.test.mjs test/policy-evaluator.test.mjs test/compatibility-preflight.test.mjs test/profile-artifact.test.mjs test/decision-runner.test.mjs test/governance-contract.integration.test.mjs test/watchdog-chain.integration.test.mjs test/runtime-integrated.integration.test.mjs test/exports-boundary.integration.test.mjs" } } diff --git a/plugins/reporting-governance/src/core/index.mjs b/plugins/reporting-governance/src/core/index.mjs index e341a6f..010143c 100644 --- a/plugins/reporting-governance/src/core/index.mjs +++ b/plugins/reporting-governance/src/core/index.mjs @@ -1,4 +1,5 @@ export { evaluatePolicyPack, evaluatePolicies } from './policy-evaluator.mjs'; export { planDecisionExecution } from './decision-runner.mjs'; export { executeGovernanceContract } from './execute-governance-contract.mjs'; +export { executeRuntimeIntegratedGovernance } from './runtime-integrated.mjs'; export { runCompatibilityPreflight } from './compatibility-preflight.mjs'; diff --git a/plugins/reporting-governance/src/core/runtime-integrated.mjs b/plugins/reporting-governance/src/core/runtime-integrated.mjs new file mode 100644 index 0000000..e4cbbb7 --- /dev/null +++ b/plugins/reporting-governance/src/core/runtime-integrated.mjs @@ -0,0 +1,52 @@ +import { executeGovernanceContract } from './execute-governance-contract.mjs'; +import { runOrchestratorAdapter } from '../adapters/orchestrator.mjs'; + +export function executeRuntimeIntegratedGovernance({ + event, + evidence = [], + capabilityDescriptor = {}, + policyPacks = [], + context = {}, + profile = {}, + packageVersion, + repoRootOverride, + runtime = null, +} = {}) { + const governance = executeGovernanceContract({ + event, + evidence, + capabilityDescriptor, + policyPacks, + context, + profile, + packageVersion, + repoRootOverride, + }); + + const shouldRunOrchestrator = Boolean( + runtime + && governance.preflight?.status === 'pass' + && governance.deploymentBinding + && governance.contract?.adapter_actions?.includes('notify_operator') + ); + + const runtimeExecution = shouldRunOrchestrator + ? runOrchestratorAdapter({ + profileArtifact: profile, + repoRootOverride, + ...runtime, + }) + : null; + + return { + ...governance, + runtimeExecution, + runtimeIntegration: { + attempted: shouldRunOrchestrator, + adapter: shouldRunOrchestrator ? 'orchestrator' : null, + reason: shouldRunOrchestrator + ? 'deployment binding + notify_operator adapter action routed into orchestrator adapter' + : 'runtime execution not attempted', + }, + }; +} diff --git a/plugins/reporting-governance/src/index.mjs b/plugins/reporting-governance/src/index.mjs index 4bfa7c5..79892ef 100644 --- a/plugins/reporting-governance/src/index.mjs +++ b/plugins/reporting-governance/src/index.mjs @@ -34,6 +34,7 @@ export { evaluatePolicies, planDecisionExecution, executeGovernanceContract, + executeRuntimeIntegratedGovernance, runCompatibilityPreflight, } from './core/index.mjs'; export { diff --git a/plugins/reporting-governance/test/exports-boundary.integration.test.mjs b/plugins/reporting-governance/test/exports-boundary.integration.test.mjs index 8afad97..d6db08c 100644 --- a/plugins/reporting-governance/test/exports-boundary.integration.test.mjs +++ b/plugins/reporting-governance/test/exports-boundary.integration.test.mjs @@ -69,6 +69,7 @@ test('package root export resolves public package surface only', () => { hasRunWatchdogChain: typeof plugin.runWatchdogChain, hasPlanDecisionExecution: typeof plugin.planDecisionExecution, hasExecuteGovernanceContract: typeof plugin.executeGovernanceContract, + hasExecuteRuntimeIntegratedGovernance: typeof plugin.executeRuntimeIntegratedGovernance, })); `); @@ -76,6 +77,7 @@ test('package root export resolves public package surface only', () => { assert.equal(result.hasRunWatchdogChain, 'function'); assert.equal(result.hasPlanDecisionExecution, 'function'); assert.equal(result.hasExecuteGovernanceContract, 'function'); + assert.equal(result.hasExecuteRuntimeIntegratedGovernance, 'function'); } finally { fs.rmSync(root, { recursive: true, force: true }); } @@ -141,43 +143,47 @@ test('leaf subpath export resolves and can execute through injected runtime bind const root = createFixtureRoot(); try { installPackageAlias(root); - fs.mkdirSync(path.join(root, 'scripts'), { recursive: true }); - fs.mkdirSync(path.join(root, 'events'), { recursive: true }); - fs.mkdirSync(path.join(root, 'evidence'), { recursive: true }); - fs.mkdirSync(path.join(root, 'queue'), { recursive: true }); const statePath = writeState(root); - - const stubScriptPath = path.join(root, 'scripts', 'custom-watchdog.mjs'); - fs.writeFileSync(stubScriptPath, ` - process.stdout.write(JSON.stringify({ - ok: true, - source: 'stub-watchdog', - argv: process.argv.slice(2), - })); - `, 'utf8'); + fs.mkdirSync(path.join(root, 'evidence'), { recursive: true }); + fs.mkdirSync(path.join(root, 'events'), { recursive: true }); + fs.mkdirSync(path.join(root, 'queue'), { recursive: true }); + fs.mkdirSync(path.join(root, 'spool'), { recursive: true }); + fs.mkdirSync(path.join(root, 'receipts'), { recursive: true }); const result = runJsonEval(root, ` - import { runWatchdogAdapter } from '@openclaw/plugin-reporting-governance/adapters/watchdog'; - const out = runWatchdogAdapter({ + import { runOrchestratorAdapter } from '@openclaw/plugin-reporting-governance/adapters/orchestrator'; + const payload = runOrchestratorAdapter({ + runtimeBinding: { + cwd: ${JSON.stringify(path.resolve(packageRoot, '..', '..'))}, + scripts: { + orchestrator: ${JSON.stringify(path.resolve(packageRoot, '..', '..', 'scripts', 'watchdog_auto_notify_orchestrator.mjs'))}, + watchdog: ${JSON.stringify(path.resolve(packageRoot, '..', '..', 'scripts', 'long_task_watchdog.mjs'))}, + dispatcher: ${JSON.stringify(path.resolve(packageRoot, '..', '..', 'scripts', 'operator_notify_dispatcher.mjs'))}, + bridgeSupervisor: ${JSON.stringify(path.resolve(packageRoot, '..', '..', 'scripts', 'operator_notify_bridge_supervisor.mjs'))}, + }, + }, state: ${JSON.stringify(statePath)}, evidenceDir: ${JSON.stringify(path.join(root, 'evidence'))}, eventDir: ${JSON.stringify(path.join(root, 'events'))}, - notificationDir: ${JSON.stringify(path.join(root, 'queue'))}, - runtimeBinding: { - cwd: ${JSON.stringify(root)}, - scripts: { - watchdog: ${JSON.stringify(stubScriptPath)}, - }, - }, + queueDir: ${JSON.stringify(path.join(root, 'queue'))}, + spoolDir: ${JSON.stringify(path.join(root, 'spool'))}, + receiptDir: ${JSON.stringify(path.join(root, 'receipts'))}, + writeState: true, + dryRun: true, now: '2026-05-07T08:20:00.000Z', }); - process.stdout.write(JSON.stringify(out)); - `); + process.stdout.write(JSON.stringify({ + ok: payload.ok, + dispatchedCount: payload.result.dispatcher.dispatchedCount, + pendingCount: payload.result.supervisor.pendingCount, + })); + `, { + RG_REPO_ROOT: path.resolve(packageRoot, '..', '..'), + }); assert.equal(result.ok, true); - assert.equal(result.source, 'stub-watchdog'); - assert.ok(result.argv.includes('--state')); - assert.ok(result.argv.includes(path.resolve(statePath))); + assert.equal(result.dispatchedCount, 1); + assert.equal(result.pendingCount, 1); } finally { fs.rmSync(root, { recursive: true, force: true }); } diff --git a/plugins/reporting-governance/test/runtime-integrated.integration.test.mjs b/plugins/reporting-governance/test/runtime-integrated.integration.test.mjs new file mode 100644 index 0000000..c296e48 --- /dev/null +++ b/plugins/reporting-governance/test/runtime-integrated.integration.test.mjs @@ -0,0 +1,207 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import fs from 'node:fs'; +import path from 'node:path'; +import os from 'node:os'; + +import { executeRuntimeIntegratedGovernance } from '../src/index.mjs'; +import capabilityDescriptor from '../capabilities/openclaw-watchdog-reference.json' with { type: 'json' }; + +const packageRoot = path.resolve(import.meta.dirname, '..'); +const repoRoot = path.resolve(packageRoot, '..', '..'); + +const noSilencePack = { + metadata: { id: 'no-silence', severity_default: 'high' }, + spec: { + evaluation_mode: 'any_rule_match', + rules: [ + { + id: 'no-silence.missed-checkpoint', + title: 'Missed checkpoint requires visible recovery', + triggers: { event_types: ['silence_timeout'] }, + conditions: { + all: [ + { fact: 'checkpoint.is_overdue', equals: true } + ] + }, + decision_output: { + decision: 'force_checkpoint', + severity: 'high', + reason: 'checkpoint overdue triggered forced operator-visible recovery', + required_actions: [ + { action: 'notify_operator', target: 'operator_channel', mandatory: true }, + { action: 'emit_event', target: 'event_stream', mandatory: true } + ], + operator_notice: { + required: true, + channel: 'telegram', + urgency: 'high', + message: 'Required update: checkpoint overdue.', + deadline: '2026-01-01T00:00:00.000Z' + } + } + } + ] + } +}; + +const strictProfileArtifact = { + kind: 'DeploymentProfileArtifact', + apiVersion: 'reporting-governance/v1alpha1', + metadata: { + id: 'strict-manager-mode', + runtime: 'openclaw', + compatibility_mode: 'strict_envelope', + }, + spec: { + package: { pluginVersion: '0.1.0-mainline' }, + policies: { + overrides: { + checkpoints: { overdueAction: 'force_checkpoint' } + } + }, + notifications: { + operatorVisibleRecoveryRequired: true + }, + bindings: { + runtime: 'openclaw', + entrypoint: 'scripts/watchdog_auto_notify_orchestrator.mjs', + scripts: { + watchdog: 'scripts/long_task_watchdog.mjs', + dispatcher: 'scripts/operator_notify_dispatcher.mjs', + bridgeSupervisor: 'scripts/operator_notify_bridge_supervisor.mjs', + senderBinding: 'scripts/operator_notify_sender_binding.mjs', + orchestrator: 'scripts/watchdog_auto_notify_orchestrator.mjs' + }, + artifact_roots: { + queueItems: 'state/operator-notify-queue' + } + } + }, + capability_expectations: { + required: [ + 'emit_canonical_events', + 'evaluate_watchdog_overdue', + 'create_queue_items', + 'create_spool_handoff', + 'write_bridge_receipts' + ], + preferred: ['direct_sender_binding', 'final_delivery_ack'] + } +}; + +function createFixtureRoot() { + return fs.mkdtempSync(path.join(os.tmpdir(), 'reporting-governance-runtime-integrated-')); +} + +function mkdirs(root, names) { + for (const name of names) { + fs.mkdirSync(path.join(root, name), { recursive: true }); + } +} + +function writeState(root) { + const statePath = path.join(root, 'watchdog-state.json'); + fs.writeFileSync(statePath, `${JSON.stringify({ + version: 1, + watchdogs: [ + { + id: 'reporting-governance-plugin-watchdog', + task: 'reporting-governance plugin spec development', + status: 'active', + ownerSessionKey: 'agent:coder:main', + reportChannel: 'telegram', + reportTarget: '864811879', + intervalMinutes: 10, + lastMilestoneAt: '2026-05-07T08:00:00.000Z', + lastAlertAt: null, + }, + ], + }, null, 2)}\n`, 'utf8'); + return statePath; +} + +function readSingleJson(dirPath) { + const files = fs.readdirSync(dirPath).filter((name) => name.endsWith('.json')).sort(); + assert.equal(files.length, 1, `expected exactly one json file in ${dirPath}`); + return JSON.parse(fs.readFileSync(path.join(dirPath, files[0]), 'utf8')); +} + +test('runtime-integrated path wires executeGovernanceContract deployment binding into orchestrator execution', () => { + const root = createFixtureRoot(); + try { + mkdirs(root, ['evidence', 'events', 'queue', 'spool', 'receipts']); + const statePath = writeState(root); + + const result = executeRuntimeIntegratedGovernance({ + event: { + type: 'silence_timeout', + payload: { + checkpoint_overdue: true, + } + }, + evidence: [ + { id: 'ev-watchdog', quality: 'moderate', is_new: true } + ], + capabilityDescriptor, + policyPacks: [noSilencePack], + context: { + signals: ['checkpoint_overdue'], + }, + profile: strictProfileArtifact, + packageVersion: '0.1.0-mainline', + repoRootOverride: repoRoot, + runtime: { + state: statePath, + evidenceDir: path.join(root, 'evidence'), + eventDir: path.join(root, 'events'), + queueDir: path.join(root, 'queue'), + spoolDir: path.join(root, 'spool'), + receiptDir: path.join(root, 'receipts'), + writeState: true, + dryRun: true, + now: '2026-05-07T08:20:00.000Z', + }, + }); + + assert.equal(result.preflight.status, 'pass'); + assert.equal(result.contract.decision, 'force_checkpoint'); + assert.equal(result.runtimeIntegration.attempted, true); + assert.equal(result.runtimeIntegration.adapter, 'orchestrator'); + assert.equal(result.runtimeExecution.ok, true); + assert.equal(result.runtimeExecution.result.dispatcher.dispatchedCount, 1); + assert.equal(result.runtimeExecution.result.supervisor.pendingCount, 1); + + const queueItem = readSingleJson(path.join(root, 'queue')); + assert.equal(queueItem.status, 'dispatched'); + + const receipt = readSingleJson(path.join(root, 'receipts')); + assert.equal(receipt.state, 'pending_external_send'); + assert.equal(receipt.supervisor_mode, 'dry_run'); + } finally { + fs.rmSync(root, { recursive: true, force: true }); + } +}); + +test('runtime-integrated path stays planning-only when no runtime payload is supplied', () => { + const result = executeRuntimeIntegratedGovernance({ + event: { + type: 'silence_timeout', + payload: { + checkpoint_overdue: true, + } + }, + capabilityDescriptor, + policyPacks: [noSilencePack], + context: { + signals: ['checkpoint_overdue'], + }, + profile: strictProfileArtifact, + packageVersion: '0.1.0-mainline', + repoRootOverride: repoRoot, + }); + + assert.equal(result.preflight.status, 'pass'); + assert.equal(result.runtimeIntegration.attempted, false); + assert.equal(result.runtimeExecution, null); +});