286 lines
8.7 KiB
JavaScript
286 lines
8.7 KiB
JavaScript
#!/usr/bin/env node
|
|
import fs from 'fs';
|
|
|
|
const EVIDENCE_FIELDS = Object.freeze({
|
|
externalizedCheckpoint: Object.freeze([
|
|
'externalizedCheckpointPath',
|
|
'externalizedTrigger',
|
|
'checkpointPath',
|
|
]),
|
|
concreteNextAction: Object.freeze([
|
|
'nextStep',
|
|
'requiredNextAction',
|
|
'concreteNextAction',
|
|
]),
|
|
buttonPathMode: Object.freeze([
|
|
'handoffMode',
|
|
'handoff.mode',
|
|
'replyClosureMode',
|
|
]),
|
|
progressionClaim: Object.freeze([
|
|
'progressionClaim',
|
|
'claimedProgression',
|
|
'statusSummary',
|
|
]),
|
|
executionEvidence: Object.freeze([
|
|
'executionEvidence',
|
|
'toolCallEvidence',
|
|
'dispatchEvidence',
|
|
'fileChangeEvidence',
|
|
'verificationEvidence',
|
|
'checkpointArtifactEvidence',
|
|
]),
|
|
progressEvidence: Object.freeze([
|
|
'progressEvidence',
|
|
'progressEvidence.sessionKey',
|
|
'progressEvidence.runId',
|
|
'progressEvidence.modified_files',
|
|
'progressEvidence.verificationResult',
|
|
'sessionKey',
|
|
'runId',
|
|
'modified_files',
|
|
'verificationResult',
|
|
]),
|
|
});
|
|
|
|
const GATE_REQUIREMENTS = Object.freeze({
|
|
externalizedCheckpoint: Object.freeze({
|
|
evidenceKey: 'externalizedCheckpoint',
|
|
acceptedFields: EVIDENCE_FIELDS.externalizedCheckpoint,
|
|
requiredValue: 'non-empty string',
|
|
}),
|
|
concreteNextAction: Object.freeze({
|
|
evidenceKey: 'concreteNextAction',
|
|
acceptedFields: EVIDENCE_FIELDS.concreteNextAction,
|
|
requiredValue: 'non-empty string',
|
|
}),
|
|
buttonPathMode: Object.freeze({
|
|
evidenceKey: 'buttonPathMode',
|
|
acceptedFields: EVIDENCE_FIELDS.buttonPathMode,
|
|
requiredValue: 'button_path',
|
|
}),
|
|
executionEvidence: Object.freeze({
|
|
evidenceKey: 'executionEvidence',
|
|
acceptedFields: EVIDENCE_FIELDS.executionEvidence,
|
|
requiredValue: 'tool call, dispatch, file change, verification output, or checkpoint artifact evidence',
|
|
}),
|
|
progressEvidence: Object.freeze({
|
|
evidenceKey: 'progressEvidence',
|
|
acceptedFields: EVIDENCE_FIELDS.progressEvidence,
|
|
requiredValue: 'sessionKey, runId, modified_files, verification result, or equivalent concrete progress evidence',
|
|
}),
|
|
});
|
|
|
|
function fail(code, message) {
|
|
process.stderr.write(`${code}: ${message}\n`);
|
|
process.exit(1);
|
|
}
|
|
|
|
function parseArgs(argv) {
|
|
const args = { input: '', pretty: true };
|
|
for (let i = 2; i < argv.length; i += 1) {
|
|
const arg = argv[i];
|
|
if (arg === '--input') {
|
|
const value = argv[i + 1];
|
|
if (!value || value.startsWith('--')) fail('CLI_ERROR', '--input requires a value');
|
|
args.input = value;
|
|
i += 1;
|
|
} else if (arg === '--compact') {
|
|
args.pretty = false;
|
|
} else {
|
|
fail('CLI_ERROR', `unknown argument: ${arg}`);
|
|
}
|
|
}
|
|
return args;
|
|
}
|
|
|
|
function readInput(path) {
|
|
if (!path || path === '-') return fs.readFileSync(0, 'utf8');
|
|
return fs.readFileSync(path, 'utf8');
|
|
}
|
|
|
|
function parseJson(raw) {
|
|
try {
|
|
return JSON.parse(raw);
|
|
} catch {
|
|
fail('INVALID_JSON', 'input must be valid JSON');
|
|
}
|
|
}
|
|
|
|
function isLongTask(input) {
|
|
return input.classification === 'long_task';
|
|
}
|
|
|
|
function hasNonEmptyString(value) {
|
|
return typeof value === 'string' && value.trim().length > 0;
|
|
}
|
|
|
|
function getPathValue(input, path) {
|
|
return path.split('.').reduce((current, key) => {
|
|
if (current === null || current === undefined) return undefined;
|
|
return current[key];
|
|
}, input);
|
|
}
|
|
|
|
function hasAnyNonEmptyString(input, fieldPaths) {
|
|
return fieldPaths.some((fieldPath) => hasNonEmptyString(getPathValue(input, fieldPath)));
|
|
}
|
|
|
|
function hasAcceptedValue(input, fieldPaths, acceptedValue) {
|
|
return fieldPaths.some((fieldPath) => getPathValue(input, fieldPath) === acceptedValue);
|
|
}
|
|
|
|
function describeRequirement(requirement) {
|
|
return {
|
|
evidenceKey: requirement.evidenceKey,
|
|
acceptedFields: [...requirement.acceptedFields],
|
|
requiredValue: requirement.requiredValue,
|
|
};
|
|
}
|
|
|
|
function hasExternalizedCheckpointPath(input) {
|
|
return hasAnyNonEmptyString(input, EVIDENCE_FIELDS.externalizedCheckpoint);
|
|
}
|
|
|
|
function hasConcreteNextAction(input) {
|
|
return hasAnyNonEmptyString(input, EVIDENCE_FIELDS.concreteNextAction);
|
|
}
|
|
|
|
function wantsSilentContinuation(input) {
|
|
if (typeof input.silentContinuation === 'boolean') return input.silentContinuation;
|
|
if (typeof input.silentCandidate === 'boolean') return input.silentCandidate;
|
|
if (typeof input.needsWaiting === 'boolean' && input.needsWaiting) return true;
|
|
if (typeof input.needsSubagent === 'boolean' && input.needsSubagent) return true;
|
|
return false;
|
|
}
|
|
|
|
function claimsExecution(input) {
|
|
if (typeof input.claimedExecution === 'boolean') return input.claimedExecution;
|
|
if (typeof input.executionClaimed === 'boolean') return input.executionClaimed;
|
|
if (typeof input.status === 'string' && input.status === 'active') return true;
|
|
return false;
|
|
}
|
|
|
|
function needsOwnerDecision(input) {
|
|
if (typeof input.needsOwnerDecision === 'boolean') return input.needsOwnerDecision;
|
|
return false;
|
|
}
|
|
|
|
function usesButtonPath(input) {
|
|
return hasAcceptedValue(input, EVIDENCE_FIELDS.buttonPathMode, 'button_path');
|
|
}
|
|
|
|
function hasExecutionEvidence(input) {
|
|
return EVIDENCE_FIELDS.executionEvidence.some((fieldPath) => {
|
|
const value = getPathValue(input, fieldPath);
|
|
if (hasNonEmptyString(value)) return true;
|
|
if (Array.isArray(value)) return value.length > 0;
|
|
if (value && typeof value === 'object') return Object.keys(value).length > 0;
|
|
return false;
|
|
});
|
|
}
|
|
|
|
function hasProgressEvidence(input) {
|
|
return EVIDENCE_FIELDS.progressEvidence.some((fieldPath) => {
|
|
const value = getPathValue(input, fieldPath);
|
|
if (hasNonEmptyString(value)) return true;
|
|
if (Array.isArray(value)) return value.length > 0;
|
|
if (value && typeof value === 'object') return Object.keys(value).length > 0;
|
|
return false;
|
|
});
|
|
}
|
|
|
|
function claimsProgression(input) {
|
|
const progressionClaim = EVIDENCE_FIELDS.progressionClaim
|
|
.map((fieldPath) => getPathValue(input, fieldPath))
|
|
.find((value) => hasNonEmptyString(value));
|
|
|
|
return hasNonEmptyString(progressionClaim);
|
|
}
|
|
|
|
function claimsProgressionWithoutEvidence(input) {
|
|
if (!claimsProgression(input)) return false;
|
|
return !hasProgressEvidence(input);
|
|
}
|
|
|
|
function evaluateGate(input) {
|
|
const gateRequired = isLongTask(input);
|
|
const reasons = [];
|
|
const requiredEvidence = [];
|
|
const allowedResponseModes = [];
|
|
|
|
if (!gateRequired) {
|
|
return {
|
|
gateRequired: false,
|
|
gateStatus: 'not_applicable',
|
|
reasons: ['classification is not long_task'],
|
|
requiredEvidence: [],
|
|
allowedResponseModes: ['direct_reply'],
|
|
};
|
|
}
|
|
|
|
let failed = false;
|
|
|
|
if (wantsSilentContinuation(input) && !hasExternalizedCheckpointPath(input)) {
|
|
failed = true;
|
|
reasons.push('silent long-task cannot continue without externalized checkpoint path');
|
|
requiredEvidence.push(describeRequirement(GATE_REQUIREMENTS.externalizedCheckpoint));
|
|
allowedResponseModes.push('non_silent_follow_up');
|
|
}
|
|
|
|
if (claimsExecution(input) && !hasConcreteNextAction(input)) {
|
|
failed = true;
|
|
reasons.push('claimed execution requires evidence of a concrete next action');
|
|
requiredEvidence.push(describeRequirement(GATE_REQUIREMENTS.concreteNextAction));
|
|
allowedResponseModes.push('checkpoint_only');
|
|
}
|
|
|
|
if (needsOwnerDecision(input) && !usesButtonPath(input)) {
|
|
failed = true;
|
|
reasons.push('owner decision flow must end in button-path, not plain text');
|
|
requiredEvidence.push(describeRequirement(GATE_REQUIREMENTS.buttonPathMode));
|
|
allowedResponseModes.push('button_path');
|
|
}
|
|
|
|
if (claimsProgressionWithoutEvidence(input)) {
|
|
failed = true;
|
|
reasons.push('claimed progression without concrete progress evidence is forbidden');
|
|
requiredEvidence.push(describeRequirement(GATE_REQUIREMENTS.progressEvidence));
|
|
allowedResponseModes.push('evidence_preserving_follow_up');
|
|
}
|
|
|
|
if (!failed) {
|
|
reasons.push('required long-task gate evidence is present or no gated condition was triggered');
|
|
allowedResponseModes.push(needsOwnerDecision(input) ? 'button_path' : 'direct_reply');
|
|
if (wantsSilentContinuation(input)) allowedResponseModes.push('silent_continuation');
|
|
}
|
|
|
|
return {
|
|
gateRequired: true,
|
|
gateStatus: failed ? 'fail' : 'pass',
|
|
reasons,
|
|
requiredEvidence,
|
|
allowedResponseModes: [...new Set(allowedResponseModes)],
|
|
};
|
|
}
|
|
|
|
function main() {
|
|
const args = parseArgs(process.argv);
|
|
const raw = readInput(args.input);
|
|
const input = parseJson(raw);
|
|
const output = evaluateGate(input);
|
|
process.stdout.write(JSON.stringify(output, null, args.pretty ? 2 : 0) + '\n');
|
|
}
|
|
|
|
export { evaluateGate };
|
|
|
|
const isDirectRun = process.argv[1] && fs.realpathSync(process.argv[1]) === fs.realpathSync(new URL(import.meta.url));
|
|
|
|
if (isDirectRun) {
|
|
try {
|
|
main();
|
|
} catch (error) {
|
|
fail('CLI_ERROR', error && error.message ? error.message : 'unexpected error');
|
|
}
|
|
}
|