feat: add proactive report gate lock evaluator
This commit is contained in:
201
scripts/proactive_report_gate_lock.mjs
Normal file
201
scripts/proactive_report_gate_lock.mjs
Normal file
@@ -0,0 +1,201 @@
|
|||||||
|
#!/usr/bin/env node
|
||||||
|
import fs from 'node:fs';
|
||||||
|
|
||||||
|
const LEGAL_FALLBACK_STATES = new Set(['paused', 'blocked', 'waiting_user', 'pending_verification']);
|
||||||
|
const LEGAL_REPORT_MODES = new Set(['checkpoint_only', 'watchdog', 'button_path', 'direct_update']);
|
||||||
|
|
||||||
|
function fail(code, message) {
|
||||||
|
process.stderr.write(`${code}: ${message}\n`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseArgs(argv) {
|
||||||
|
const args = { input: '', pretty: true };
|
||||||
|
for (let i = 2; i < argv.length; i += 1) {
|
||||||
|
const arg = argv[i];
|
||||||
|
if (arg === '--input') {
|
||||||
|
const value = argv[i + 1];
|
||||||
|
if (!value || value.startsWith('--')) fail('CLI_ERROR', '--input requires a value');
|
||||||
|
args.input = value;
|
||||||
|
i += 1;
|
||||||
|
} else if (arg === '--compact') {
|
||||||
|
args.pretty = false;
|
||||||
|
} else {
|
||||||
|
fail('CLI_ERROR', `unknown argument: ${arg}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return args;
|
||||||
|
}
|
||||||
|
|
||||||
|
function readInput(inputPath) {
|
||||||
|
if (!inputPath || inputPath === '-') return fs.readFileSync(0, 'utf8');
|
||||||
|
return fs.readFileSync(inputPath, 'utf8');
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseJson(raw) {
|
||||||
|
try {
|
||||||
|
return JSON.parse(raw);
|
||||||
|
} catch {
|
||||||
|
fail('INVALID_JSON', 'input must be valid JSON');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function hasNonEmptyString(value) {
|
||||||
|
return typeof value === 'string' && value.trim().length > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
function isLongTask(input) {
|
||||||
|
return input?.classification === 'long_task';
|
||||||
|
}
|
||||||
|
|
||||||
|
function isSilentProgressionCandidate(input) {
|
||||||
|
if (typeof input?.silentContinuation === 'boolean') return input.silentContinuation;
|
||||||
|
if (typeof input?.silentCandidate === 'boolean') return input.silentCandidate;
|
||||||
|
if (input?.needsWaiting === true) return true;
|
||||||
|
if (input?.needsSubagent === true) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
function hasCheckpointOnlyEvidence(input) {
|
||||||
|
return hasNonEmptyString(input?.externalizedCheckpointPath) || hasNonEmptyString(input?.checkpointTrigger);
|
||||||
|
}
|
||||||
|
|
||||||
|
function isLegalFallbackState(value) {
|
||||||
|
return hasNonEmptyString(value) && LEGAL_FALLBACK_STATES.has(value.trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
function isLegalReportMode(value) {
|
||||||
|
return hasNonEmptyString(value) && LEGAL_REPORT_MODES.has(value.trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
function addRequiredEvidence(requiredEvidence, evidenceKey, acceptedFields, requiredValue) {
|
||||||
|
requiredEvidence.push({ evidenceKey, acceptedFields, requiredValue });
|
||||||
|
}
|
||||||
|
|
||||||
|
function evaluateGate(input) {
|
||||||
|
const reasons = [];
|
||||||
|
const requiredEvidence = [];
|
||||||
|
const allowedResponseModes = [];
|
||||||
|
|
||||||
|
if (!isLongTask(input)) {
|
||||||
|
return {
|
||||||
|
gate: 'proactive_report_gate_lock',
|
||||||
|
gateRequired: false,
|
||||||
|
gateStatus: 'not_applicable',
|
||||||
|
ok: true,
|
||||||
|
reasons: ['classification is not long_task'],
|
||||||
|
requiredEvidence: [],
|
||||||
|
allowedResponseModes: ['direct_reply'],
|
||||||
|
reportBindingStatus: 'not_applicable',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!isSilentProgressionCandidate(input)) {
|
||||||
|
return {
|
||||||
|
gate: 'proactive_report_gate_lock',
|
||||||
|
gateRequired: false,
|
||||||
|
gateStatus: 'not_applicable',
|
||||||
|
ok: true,
|
||||||
|
reasons: ['not a silent progression candidate'],
|
||||||
|
requiredEvidence: [],
|
||||||
|
allowedResponseModes: ['direct_reply', 'non_silent_follow_up'],
|
||||||
|
reportBindingStatus: 'not_required',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const firstReportTriggerOk = hasNonEmptyString(input?.firstReportTrigger);
|
||||||
|
const nextReportConditionOk = hasNonEmptyString(input?.nextReportCondition);
|
||||||
|
const fallbackStateOk = isLegalFallbackState(input?.fallbackState);
|
||||||
|
const reportModeOk = !hasNonEmptyString(input?.reportMode) || isLegalReportMode(input?.reportMode);
|
||||||
|
|
||||||
|
if (!firstReportTriggerOk) {
|
||||||
|
reasons.push('missing first proactive report trigger');
|
||||||
|
addRequiredEvidence(requiredEvidence, 'firstReportTrigger', ['firstReportTrigger'], 'non-empty string');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!nextReportConditionOk) {
|
||||||
|
reasons.push('missing next proactive report condition');
|
||||||
|
addRequiredEvidence(requiredEvidence, 'nextReportCondition', ['nextReportCondition'], 'non-empty string');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!fallbackStateOk) {
|
||||||
|
reasons.push('missing fallback state for stalled reporting');
|
||||||
|
addRequiredEvidence(
|
||||||
|
requiredEvidence,
|
||||||
|
'fallbackState',
|
||||||
|
['fallbackState'],
|
||||||
|
'one of paused | blocked | waiting_user | pending_verification',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!reportModeOk) {
|
||||||
|
reasons.push('invalid proactive report mode');
|
||||||
|
addRequiredEvidence(
|
||||||
|
requiredEvidence,
|
||||||
|
'reportMode',
|
||||||
|
['reportMode'],
|
||||||
|
'one of checkpoint_only | watchdog | button_path | direct_update',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const missingCoreBinding = !firstReportTriggerOk || !nextReportConditionOk || !fallbackStateOk;
|
||||||
|
if (missingCoreBinding && hasCheckpointOnlyEvidence(input)) {
|
||||||
|
reasons.push('checkpoint path alone does not satisfy proactive report binding');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (input?.needsOwnerDecision === true && input?.handoffMode !== 'button_path') {
|
||||||
|
reasons.push('owner decision flow must preserve button-path handoff');
|
||||||
|
addRequiredEvidence(requiredEvidence, 'handoffMode', ['handoffMode'], 'button_path');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (requiredEvidence.length > 0) {
|
||||||
|
allowedResponseModes.push('non_silent_follow_up');
|
||||||
|
if (input?.needsOwnerDecision === true) allowedResponseModes.push('button_path');
|
||||||
|
|
||||||
|
return {
|
||||||
|
gate: 'proactive_report_gate_lock',
|
||||||
|
gateRequired: true,
|
||||||
|
gateStatus: 'fail',
|
||||||
|
ok: false,
|
||||||
|
reasons,
|
||||||
|
requiredEvidence,
|
||||||
|
allowedResponseModes,
|
||||||
|
reportBindingStatus: 'missing',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (input?.needsOwnerDecision === true && input?.handoffMode === 'button_path') {
|
||||||
|
reasons.push('owner decision flow preserves button-path handoff');
|
||||||
|
allowedResponseModes.push('button_path');
|
||||||
|
} else {
|
||||||
|
reasons.push('proactive report binding is complete for silent progression');
|
||||||
|
allowedResponseModes.push('silent_continuation');
|
||||||
|
allowedResponseModes.push('direct_reply');
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
gate: 'proactive_report_gate_lock',
|
||||||
|
gateRequired: true,
|
||||||
|
gateStatus: 'pass',
|
||||||
|
ok: true,
|
||||||
|
reasons,
|
||||||
|
requiredEvidence: [],
|
||||||
|
allowedResponseModes,
|
||||||
|
reportBindingStatus: 'bound',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function main() {
|
||||||
|
const args = parseArgs(process.argv);
|
||||||
|
const raw = readInput(args.input);
|
||||||
|
const input = parseJson(raw);
|
||||||
|
const output = evaluateGate(input);
|
||||||
|
process.stdout.write(`${JSON.stringify(output, null, args.pretty ? 2 : 0)}\n`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const isMain = import.meta.url === new URL(`file://${process.argv[1]}`).href;
|
||||||
|
if (isMain) {
|
||||||
|
main();
|
||||||
|
}
|
||||||
|
|
||||||
|
export { evaluateGate };
|
||||||
@@ -28,23 +28,110 @@ const scenarios = [
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: 'missing next report condition',
|
name: 'missing next report condition',
|
||||||
input: {},
|
input: {
|
||||||
expected: {},
|
classification: 'long_task',
|
||||||
|
silentCandidate: true,
|
||||||
|
firstReportTrigger: 'when delegated scan returns or at 10 minutes, whichever comes first',
|
||||||
|
fallbackState: 'paused',
|
||||||
|
reportMode: 'watchdog',
|
||||||
|
},
|
||||||
|
expected: {
|
||||||
|
gateRequired: true,
|
||||||
|
gateStatus: 'fail',
|
||||||
|
reasonIncludes: 'missing next proactive report condition',
|
||||||
|
requiredEvidenceKey: 'nextReportCondition',
|
||||||
|
allowedResponseModesIncludes: 'non_silent_follow_up',
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: 'missing fallback state',
|
name: 'missing fallback state',
|
||||||
input: {},
|
input: {
|
||||||
expected: {},
|
classification: 'long_task',
|
||||||
|
silentCandidate: true,
|
||||||
|
firstReportTrigger: 'when delegated scan returns',
|
||||||
|
nextReportCondition: 'report again after verifier output or blocker-state change',
|
||||||
|
reportMode: 'watchdog',
|
||||||
|
},
|
||||||
|
expected: {
|
||||||
|
gateRequired: true,
|
||||||
|
gateStatus: 'fail',
|
||||||
|
reasonIncludes: 'missing fallback state for stalled reporting',
|
||||||
|
requiredEvidenceKey: 'fallbackState',
|
||||||
|
allowedResponseModesIncludes: 'non_silent_follow_up',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'checkpoint-only spoof is insufficient',
|
||||||
|
input: {
|
||||||
|
classification: 'long_task',
|
||||||
|
silentCandidate: true,
|
||||||
|
externalizedCheckpointPath: 'checkpoints/task-123.json',
|
||||||
|
checkpointTrigger: 'when subagent returns',
|
||||||
|
},
|
||||||
|
expected: {
|
||||||
|
gateRequired: true,
|
||||||
|
gateStatus: 'fail',
|
||||||
|
reasonIncludes: 'checkpoint path alone does not satisfy proactive report binding',
|
||||||
|
requiredEvidenceKey: 'firstReportTrigger',
|
||||||
|
allowedResponseModesIncludes: 'non_silent_follow_up',
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: 'valid proactive report binding',
|
name: 'valid proactive report binding',
|
||||||
input: {},
|
input: {
|
||||||
expected: {},
|
classification: 'long_task',
|
||||||
|
silentCandidate: true,
|
||||||
|
firstReportTrigger: 'when delegated scan returns or at 10 minutes, whichever comes first',
|
||||||
|
nextReportCondition: 'report again only after new verifier output or blocker-state change',
|
||||||
|
fallbackState: 'blocked',
|
||||||
|
reportMode: 'watchdog',
|
||||||
|
ownerVisibleIfStalled: true,
|
||||||
|
},
|
||||||
|
expected: {
|
||||||
|
gateRequired: true,
|
||||||
|
gateStatus: 'pass',
|
||||||
|
reasonIncludes: 'proactive report binding is complete for silent progression',
|
||||||
|
allowedResponseModesIncludes: 'silent_continuation',
|
||||||
|
requiredEvidenceLength: 0,
|
||||||
|
reportBindingStatus: 'bound',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: 'non-silent long-task is not gated',
|
||||||
|
input: {
|
||||||
|
classification: 'long_task',
|
||||||
|
silentCandidate: false,
|
||||||
|
},
|
||||||
|
expected: {
|
||||||
|
gateRequired: false,
|
||||||
|
gateStatus: 'not_applicable',
|
||||||
|
reasonIncludes: 'not a silent progression candidate',
|
||||||
|
allowedResponseModesIncludes: 'direct_reply',
|
||||||
|
requiredEvidenceLength: 0,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: 'owner decision + button-path handoff',
|
name: 'owner decision + button-path handoff',
|
||||||
input: {},
|
input: {
|
||||||
expected: {},
|
classification: 'long_task',
|
||||||
|
silentCandidate: true,
|
||||||
|
firstReportTrigger: 'when delegated scan returns or at 10 minutes, whichever comes first',
|
||||||
|
nextReportCondition: 'report again only after new verifier output or blocker-state change',
|
||||||
|
fallbackState: 'waiting_user',
|
||||||
|
reportMode: 'button_path',
|
||||||
|
ownerVisibleIfStalled: true,
|
||||||
|
needsOwnerDecision: true,
|
||||||
|
handoffMode: 'button_path',
|
||||||
|
},
|
||||||
|
expected: {
|
||||||
|
gateRequired: true,
|
||||||
|
gateStatus: 'pass',
|
||||||
|
reasonIncludes: 'owner decision flow preserves button-path handoff',
|
||||||
|
allowedResponseModesIncludes: 'button_path',
|
||||||
|
disallowedResponseMode: 'plain_text_closure',
|
||||||
|
requiredEvidenceLength: 0,
|
||||||
|
reportBindingStatus: 'bound',
|
||||||
|
},
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|
||||||
@@ -99,6 +186,13 @@ function assertScenario(output, expected) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (expected.disallowedResponseMode) {
|
||||||
|
assert.ok(
|
||||||
|
!output.allowedResponseModes.includes(expected.disallowedResponseMode),
|
||||||
|
`expected allowedResponseModes to exclude: ${expected.disallowedResponseMode}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
if (typeof expected.requiredEvidenceLength === 'number') {
|
if (typeof expected.requiredEvidenceLength === 'number') {
|
||||||
assert.equal(
|
assert.equal(
|
||||||
output.requiredEvidence.length,
|
output.requiredEvidence.length,
|
||||||
@@ -113,6 +207,10 @@ function assertScenario(output, expected) {
|
|||||||
`expected requiredEvidence to include key: ${expected.requiredEvidenceKey}`,
|
`expected requiredEvidence to include key: ${expected.requiredEvidenceKey}`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (expected.reportBindingStatus) {
|
||||||
|
assert.equal(output.reportBindingStatus, expected.reportBindingStatus, 'reportBindingStatus mismatch');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const results = [];
|
const results = [];
|
||||||
@@ -132,6 +230,7 @@ for (const scenario of scenarios) {
|
|||||||
reasons: output.reasons,
|
reasons: output.reasons,
|
||||||
requiredEvidenceKeys: output.requiredEvidence.map((entry) => entry.evidenceKey),
|
requiredEvidenceKeys: output.requiredEvidence.map((entry) => entry.evidenceKey),
|
||||||
allowedResponseModes: output.allowedResponseModes,
|
allowedResponseModes: output.allowedResponseModes,
|
||||||
|
reportBindingStatus: output.reportBindingStatus,
|
||||||
assertion: 'pass',
|
assertion: 'pass',
|
||||||
});
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
|||||||
Reference in New Issue
Block a user