fix: tighten auto-chain dispatch evidence
This commit is contained in:
@@ -98,23 +98,6 @@ function buildProgressEvidence(wrapperResult: any): Record<string, unknown> | nu
|
|||||||
return Object.keys(progressEvidence).length > 0 ? progressEvidence : null;
|
return Object.keys(progressEvidence).length > 0 ? progressEvidence : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildAutoChainDispatchEvidence(wrapperResult: any, progressEvidence: Record<string, unknown> | null): Record<string, unknown> | null {
|
|
||||||
const taskName = typeof progressEvidence?.sessionKey === "string"
|
|
||||||
? progressEvidence.sessionKey.trim()
|
|
||||||
: "";
|
|
||||||
const requiredNextAction = typeof wrapperResult?.requiredNextAction === "string"
|
|
||||||
? wrapperResult.requiredNextAction.trim()
|
|
||||||
: "";
|
|
||||||
|
|
||||||
if (!requiredNextAction || !taskName) return null;
|
|
||||||
|
|
||||||
return {
|
|
||||||
action: requiredNextAction,
|
|
||||||
sessionKey: taskName,
|
|
||||||
dispatched: true,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
function shouldClaimProgression(wrapperResult: any, progressEvidence: Record<string, unknown> | null): boolean {
|
function shouldClaimProgression(wrapperResult: any, progressEvidence: Record<string, unknown> | null): boolean {
|
||||||
if (!wrapperResult || wrapperResult.classification !== "long_task") return false;
|
if (!wrapperResult || wrapperResult.classification !== "long_task") return false;
|
||||||
if (progressEvidence && Object.keys(progressEvidence).length > 0) return true;
|
if (progressEvidence && Object.keys(progressEvidence).length > 0) return true;
|
||||||
@@ -170,8 +153,11 @@ function buildGateLockInput(wrapperResult: any): Record<string, unknown> {
|
|||||||
}
|
}
|
||||||
: null;
|
: null;
|
||||||
const autoChainDispatchEvidence = hasConcreteExecutionEvidence
|
const autoChainDispatchEvidence = hasConcreteExecutionEvidence
|
||||||
? buildAutoChainDispatchEvidence(wrapperResult, progressEvidence)
|
&& wrapperResult.autoChainDispatchEvidence
|
||||||
: null;
|
&& typeof wrapperResult.autoChainDispatchEvidence === "object"
|
||||||
|
&& !Array.isArray(wrapperResult.autoChainDispatchEvidence)
|
||||||
|
? wrapperResult.autoChainDispatchEvidence
|
||||||
|
: null;
|
||||||
const claimedProgression = shouldClaimProgression(wrapperResult, progressEvidence)
|
const claimedProgression = shouldClaimProgression(wrapperResult, progressEvidence)
|
||||||
? "already progressing to the next step in background"
|
? "already progressing to the next step in background"
|
||||||
: "";
|
: "";
|
||||||
|
|||||||
@@ -197,28 +197,85 @@ function hasExplicitAutoChainNextAction(input) {
|
|||||||
return hasAnyNonEmptyString(input, EVIDENCE_FIELDS.autoChainNextAction);
|
return hasAnyNonEmptyString(input, EVIDENCE_FIELDS.autoChainNextAction);
|
||||||
}
|
}
|
||||||
|
|
||||||
function hasAutoChainDispatchEvidence(input) {
|
function getExplicitAutoChainNextAction(input) {
|
||||||
return EVIDENCE_FIELDS.autoChainDispatchEvidence.some((fieldPath) => {
|
|
||||||
const value = getPathValue(input, fieldPath);
|
|
||||||
if (hasNonEmptyString(value)) return true;
|
|
||||||
if (Array.isArray(value)) return value.length > 0;
|
|
||||||
if (value && typeof value === 'object') {
|
|
||||||
if (typeof value.action === 'string' && value.action.trim().length > 0) return true;
|
|
||||||
if (typeof value.concreteNextAction === 'string' && value.concreteNextAction.trim().length > 0) return true;
|
|
||||||
if (typeof value.dispatched === 'boolean') return value.dispatched;
|
|
||||||
return Object.keys(value).length > 0;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
function requiresAutoChainDispatchEvidence(input) {
|
|
||||||
if (!hasExplicitAutoChainNextAction(input)) return false;
|
|
||||||
const nextAction = EVIDENCE_FIELDS.autoChainNextAction
|
const nextAction = EVIDENCE_FIELDS.autoChainNextAction
|
||||||
.map((fieldPath) => getPathValue(input, fieldPath))
|
.map((fieldPath) => getPathValue(input, fieldPath))
|
||||||
.find((value) => hasNonEmptyString(value));
|
.find((value) => hasNonEmptyString(value));
|
||||||
if (!hasNonEmptyString(nextAction)) return false;
|
|
||||||
return /^([a-z]+_)+[a-z]+$/i.test(nextAction.trim());
|
return hasNonEmptyString(nextAction) ? nextAction.trim() : '';
|
||||||
|
}
|
||||||
|
|
||||||
|
function isExecutableDispatchAction(action) {
|
||||||
|
if (!hasNonEmptyString(action)) return false;
|
||||||
|
return /^dispatch_[a-z0-9]+(?:_[a-z0-9]+)*$/i.test(action.trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
function getNormalizedDispatchAction(value) {
|
||||||
|
if (!hasNonEmptyString(value)) return '';
|
||||||
|
const normalized = value.trim();
|
||||||
|
return isExecutableDispatchAction(normalized) ? normalized : '';
|
||||||
|
}
|
||||||
|
|
||||||
|
function getAutoChainDispatchEvidenceMatch(input) {
|
||||||
|
const nextAction = getExplicitAutoChainNextAction(input);
|
||||||
|
if (!isExecutableDispatchAction(nextAction)) return { required: false, matched: false };
|
||||||
|
|
||||||
|
for (const fieldPath of EVIDENCE_FIELDS.autoChainDispatchEvidence) {
|
||||||
|
const value = getPathValue(input, fieldPath);
|
||||||
|
if (!value) continue;
|
||||||
|
|
||||||
|
if (hasNonEmptyString(value)) {
|
||||||
|
const directMatch = getNormalizedDispatchAction(value);
|
||||||
|
if (directMatch === nextAction) {
|
||||||
|
return { required: true, matched: true };
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof value !== 'object' || Array.isArray(value)) continue;
|
||||||
|
|
||||||
|
const candidates = [
|
||||||
|
value.action,
|
||||||
|
value.dispatchedAction,
|
||||||
|
value.nextAction,
|
||||||
|
value.autoChainNextAction,
|
||||||
|
value.requiredNextAction,
|
||||||
|
value.concreteNextAction,
|
||||||
|
value.event,
|
||||||
|
value.type,
|
||||||
|
value.kind,
|
||||||
|
value.dispatchType,
|
||||||
|
value.dispatchAction,
|
||||||
|
]
|
||||||
|
.map((candidate) => getNormalizedDispatchAction(candidate))
|
||||||
|
.filter(Boolean);
|
||||||
|
|
||||||
|
const declaresDispatch = [
|
||||||
|
value.dispatched === true,
|
||||||
|
value.wasDispatched === true,
|
||||||
|
value.didDispatch === true,
|
||||||
|
value.dispatchEvent === true,
|
||||||
|
value.event === 'dispatch',
|
||||||
|
value.type === 'dispatch',
|
||||||
|
value.kind === 'dispatch',
|
||||||
|
value.dispatchType === 'dispatch',
|
||||||
|
].some(Boolean);
|
||||||
|
|
||||||
|
|
||||||
|
if (declaresDispatch && candidates.includes(nextAction)) {
|
||||||
|
return { required: true, matched: true };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { required: true, matched: false };
|
||||||
|
}
|
||||||
|
|
||||||
|
function hasAutoChainDispatchEvidence(input) {
|
||||||
|
return getAutoChainDispatchEvidenceMatch(input).matched;
|
||||||
|
}
|
||||||
|
|
||||||
|
function requiresAutoChainDispatchEvidence(input) {
|
||||||
|
return getAutoChainDispatchEvidenceMatch(input).required;
|
||||||
}
|
}
|
||||||
|
|
||||||
function hasProgressEvidence(input) {
|
function hasProgressEvidence(input) {
|
||||||
|
|||||||
@@ -107,7 +107,11 @@ async function main() {
|
|||||||
claimedExecution: true,
|
claimedExecution: true,
|
||||||
concreteNextAction: 'dispatch_follow_up_subagent',
|
concreteNextAction: 'dispatch_follow_up_subagent',
|
||||||
autoChainNextAction: 'dispatch_follow_up_subagent',
|
autoChainNextAction: 'dispatch_follow_up_subagent',
|
||||||
autoChainDispatchEvidence: { sessionKey: 'task-123', dispatched: 'dispatch_follow_up_subagent' },
|
autoChainDispatchEvidence: {
|
||||||
|
action: 'dispatch_follow_up_subagent',
|
||||||
|
dispatched: true,
|
||||||
|
event: 'dispatch',
|
||||||
|
},
|
||||||
progressionClaim: 'already progressing to the next step in background',
|
progressionClaim: 'already progressing to the next step in background',
|
||||||
progressEvidence: { sessionKey: 'task-123' },
|
progressEvidence: { sessionKey: 'task-123' },
|
||||||
});
|
});
|
||||||
@@ -142,6 +146,43 @@ async function main() {
|
|||||||
assert.equal(directAutoChainFailResult.gateStatus, 'fail', 'direct evaluator should fail when explicit auto-chain action has no dispatch evidence');
|
assert.equal(directAutoChainFailResult.gateStatus, 'fail', 'direct evaluator should fail when explicit auto-chain action has no dispatch evidence');
|
||||||
assert.match(JSON.stringify(directAutoChainFailResult), /explicit auto-chain next action requires dispatched-action evidence/, 'direct evaluator fail-path should mention missing dispatched-action evidence');
|
assert.match(JSON.stringify(directAutoChainFailResult), /explicit auto-chain next action requires dispatched-action evidence/, 'direct evaluator fail-path should mention missing dispatched-action evidence');
|
||||||
|
|
||||||
|
const mismatchedDispatchEvidenceResult = evaluateGate({
|
||||||
|
classification: 'long_task',
|
||||||
|
claimedExecution: true,
|
||||||
|
concreteNextAction: 'dispatch_follow_up_subagent',
|
||||||
|
autoChainNextAction: 'dispatch_follow_up_subagent',
|
||||||
|
autoChainDispatchEvidence: {
|
||||||
|
action: 'dispatch_other_subagent',
|
||||||
|
dispatched: true,
|
||||||
|
event: 'dispatch',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
assert.equal(mismatchedDispatchEvidenceResult.gateStatus, 'fail', 'mismatched dispatch evidence should fail');
|
||||||
|
assert.match(JSON.stringify(mismatchedDispatchEvidenceResult), /autoChainDispatchEvidence/, 'mismatched dispatch evidence should still require matching autoChainDispatchEvidence');
|
||||||
|
|
||||||
|
const fakeCheckpointDispatchEvidenceResult = evaluateGate({
|
||||||
|
classification: 'long_task',
|
||||||
|
claimedExecution: true,
|
||||||
|
concreteNextAction: 'dispatch_follow_up_subagent',
|
||||||
|
autoChainNextAction: 'dispatch_follow_up_subagent',
|
||||||
|
autoChainDispatchEvidence: {
|
||||||
|
sessionKey: 'task-123',
|
||||||
|
checkpointPath: 'checkpoints/task-123.json',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
assert.equal(fakeCheckpointDispatchEvidenceResult.gateStatus, 'fail', 'checkpoint/session-only dispatch evidence should fail');
|
||||||
|
assert.match(JSON.stringify(fakeCheckpointDispatchEvidenceResult), /explicit auto-chain next action requires dispatched-action evidence/, 'checkpoint/session-only dispatch evidence should be rejected as fake dispatch evidence');
|
||||||
|
|
||||||
|
const neutralSnakeCaseResult = evaluateGate({
|
||||||
|
classification: 'long_task',
|
||||||
|
claimedExecution: true,
|
||||||
|
concreteNextAction: 'summarize findings for reply',
|
||||||
|
autoChainNextAction: 'checkpoint_session_metadata_only',
|
||||||
|
executionEvidence: { concreteNextAction: 'summarize findings for reply' },
|
||||||
|
});
|
||||||
|
assert.equal(neutralSnakeCaseResult.gateStatus, 'pass', 'neutral snake_case non-dispatch action should not trigger dispatch-evidence requirement');
|
||||||
|
assert.doesNotMatch(JSON.stringify(neutralSnakeCaseResult), /autoChainDispatchEvidence/, 'neutral snake_case non-dispatch action should not mention dispatch-evidence requirement');
|
||||||
|
|
||||||
const passInjected = await withPatchedWrapper(buildWrapperScript({
|
const passInjected = await withPatchedWrapper(buildWrapperScript({
|
||||||
classification: 'long_task',
|
classification: 'long_task',
|
||||||
silentCandidate: true,
|
silentCandidate: true,
|
||||||
@@ -151,6 +192,11 @@ async function main() {
|
|||||||
silentLaunchOk: true,
|
silentLaunchOk: true,
|
||||||
silentLaunchReason: 'checkpoint established',
|
silentLaunchReason: 'checkpoint established',
|
||||||
requiredNextAction: 'dispatch_follow_up_subagent',
|
requiredNextAction: 'dispatch_follow_up_subagent',
|
||||||
|
autoChainDispatchEvidence: {
|
||||||
|
action: 'dispatch_follow_up_subagent',
|
||||||
|
dispatched: true,
|
||||||
|
event: 'dispatch',
|
||||||
|
},
|
||||||
taskRecord: { task_name: 'task-123' },
|
taskRecord: { task_name: 'task-123' },
|
||||||
handoff: { mode: 'direct_reply' },
|
handoff: { mode: 'direct_reply' },
|
||||||
}), async () => runScenario(forceRecall, requestText));
|
}), async () => runScenario(forceRecall, requestText));
|
||||||
|
|||||||
Reference in New Issue
Block a user