fix: require real progress evidence artifacts

This commit is contained in:
Eve
2026-04-24 07:29:56 +08:00
parent 5647a34294
commit 227755aa57
2 changed files with 39 additions and 13 deletions

View File

@@ -96,13 +96,36 @@ async function runLongTaskWrapper(workspaceDir: string, ctx: any): Promise<any |
} }
function buildProgressEvidence(wrapperResult: any): Record<string, unknown> | null { function buildProgressEvidence(wrapperResult: any): Record<string, unknown> | null {
const candidate = wrapperResult?.progressEvidence;
if (!candidate || typeof candidate !== "object" || Array.isArray(candidate)) {
return null;
}
const progressEvidence: Record<string, unknown> = {}; const progressEvidence: Record<string, unknown> = {};
const taskName = typeof wrapperResult?.taskRecord?.task_name === "string" const sessionKey = typeof candidate.sessionKey === "string"
? wrapperResult.taskRecord.task_name.trim() ? candidate.sessionKey.trim()
: ""; : "";
if (wrapperResult?.silentLaunchOk === true && taskName) { if (sessionKey) {
progressEvidence.sessionKey = taskName; progressEvidence.sessionKey = sessionKey;
}
const runId = typeof candidate.runId === "string"
? candidate.runId.trim()
: "";
if (runId) {
progressEvidence.runId = runId;
}
if (Array.isArray(candidate.modified_files) && candidate.modified_files.length > 0) {
progressEvidence.modified_files = candidate.modified_files;
}
const verificationResult = typeof candidate.verificationResult === "string"
? candidate.verificationResult.trim()
: "";
if (verificationResult) {
progressEvidence.verificationResult = verificationResult;
} }
return Object.keys(progressEvidence).length > 0 ? progressEvidence : null; return Object.keys(progressEvidence).length > 0 ? progressEvidence : null;
@@ -174,9 +197,8 @@ function buildGateLockInput(wrapperResult: any): Record<string, unknown> {
const progressEvidenceReason = claimedProgression && !progressEvidence const progressEvidenceReason = claimedProgression && !progressEvidence
? "progression claim requires concrete evidence such as sessionKey, runId, modified_files, or verification result" ? "progression claim requires concrete evidence such as sessionKey, runId, modified_files, or verification result"
: ""; : "";
const hasExternalizedCheckpointEvidence = wrapperResult.silentLaunchOk === true const hasExternalizedCheckpointEvidence = typeof wrapperResult.externalizedCheckpointPath === "string"
&& typeof wrapperResult.taskRecord?.task_name === "string" && wrapperResult.externalizedCheckpointPath.trim().length > 0;
&& wrapperResult.taskRecord.task_name.trim().length > 0;
const hasButtonPathClosureEvidence = needsOwnerDecision && wrapperResult.silentLaunchOk === true; const hasButtonPathClosureEvidence = needsOwnerDecision && wrapperResult.silentLaunchOk === true;
return { return {
@@ -206,8 +228,8 @@ function buildGateLockInput(wrapperResult: any): Record<string, unknown> {
dispatchEvidence: "", dispatchEvidence: "",
fileChangeEvidence: "", fileChangeEvidence: "",
verificationEvidence: "", verificationEvidence: "",
checkpointArtifactEvidence: hasExternalizedCheckpointEvidence ? wrapperResult.taskRecord.task_name.trim() : "", checkpointArtifactEvidence: hasExternalizedCheckpointEvidence ? wrapperResult.externalizedCheckpointPath.trim() : "",
externalizedCheckpointPath: hasExternalizedCheckpointEvidence ? wrapperResult.taskRecord.task_name.trim() : "", externalizedCheckpointPath: hasExternalizedCheckpointEvidence ? wrapperResult.externalizedCheckpointPath.trim() : "",
externalizedTrigger: hasExternalizedCheckpointEvidence ? "hook-preflight-checkpoint" : "", externalizedTrigger: hasExternalizedCheckpointEvidence ? "hook-preflight-checkpoint" : "",
handoffMode: hasButtonPathClosureEvidence ? (wrapperResult.handoff?.mode ?? "button_path") : "direct_reply", handoffMode: hasButtonPathClosureEvidence ? (wrapperResult.handoff?.mode ?? "button_path") : "direct_reply",
replyClosureMode: hasButtonPathClosureEvidence ? (wrapperResult.handoff?.mode ?? "button_path") : "direct_reply", replyClosureMode: hasButtonPathClosureEvidence ? (wrapperResult.handoff?.mode ?? "button_path") : "direct_reply",

View File

@@ -209,7 +209,8 @@ async function main() {
dispatched: true, dispatched: true,
event: 'dispatch', event: 'dispatch',
}, },
taskRecord: { task_name: 'task-123' }, progressEvidence: { sessionKey: 'task-123' },
externalizedCheckpointPath: 'checkpoints/task-123.json',
handoff: { mode: 'direct_reply' }, handoff: { mode: 'direct_reply' },
}), async () => runScenario(forceRecall, requestText)); }), async () => runScenario(forceRecall, requestText));
assert.match(passInjected, /gateStatus=pass/, 'hook pass-path should pass when wrapper provides concrete progressEvidence'); assert.match(passInjected, /gateStatus=pass/, 'hook pass-path should pass when wrapper provides concrete progressEvidence');
@@ -285,7 +286,8 @@ async function main() {
dispatched: true, dispatched: true,
event: 'dispatch', event: 'dispatch',
}, },
taskRecord: { task_name: 'task-spec-review-missing-evidence' }, progressEvidence: { sessionKey: 'task-spec-review-missing-evidence' },
externalizedCheckpointPath: 'checkpoints/task-spec-review-missing-evidence.json',
handoff: { mode: 'direct_reply' }, handoff: { mode: 'direct_reply' },
}), async () => runScenario(forceRecall, plannerOnlyRequestText)); }), async () => runScenario(forceRecall, plannerOnlyRequestText));
assert.match(specReviewWithoutEvidenceInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook spec-review missing-evidence path should emit auto-chain plan block'); assert.match(specReviewWithoutEvidenceInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook spec-review missing-evidence path should emit auto-chain plan block');
@@ -310,7 +312,8 @@ async function main() {
dispatched: true, dispatched: true,
event: 'dispatch', event: 'dispatch',
}, },
taskRecord: { task_name: 'task-fix-slice-missing-evidence' }, progressEvidence: { sessionKey: 'task-fix-slice-missing-evidence' },
externalizedCheckpointPath: 'checkpoints/task-fix-slice-missing-evidence.json',
handoff: { mode: 'direct_reply' }, handoff: { mode: 'direct_reply' },
}), async () => runScenario(forceRecall, plannerOnlyRequestText)); }), async () => runScenario(forceRecall, plannerOnlyRequestText));
assert.match(fixSliceWithoutEvidenceInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook fix-slice missing-evidence path should emit auto-chain plan block'); assert.match(fixSliceWithoutEvidenceInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook fix-slice missing-evidence path should emit auto-chain plan block');
@@ -334,7 +337,8 @@ async function main() {
dispatched: true, dispatched: true,
event: 'dispatch', event: 'dispatch',
}, },
taskRecord: { task_name: 'task-implementation-missing-evidence' }, progressEvidence: { sessionKey: 'task-implementation-missing-evidence' },
externalizedCheckpointPath: 'checkpoints/task-implementation-missing-evidence.json',
handoff: { mode: 'direct_reply' }, handoff: { mode: 'direct_reply' },
}), async () => runScenario(forceRecall, plannerOnlyRequestText)); }), async () => runScenario(forceRecall, plannerOnlyRequestText));
assert.match(specReviewWithoutImplementationEvidenceInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook implementation missing-evidence path should emit auto-chain plan block'); assert.match(specReviewWithoutImplementationEvidenceInjected, /\[LONG_TASK_AUTO_CHAIN_PLAN\]/, 'hook implementation missing-evidence path should emit auto-chain plan block');